@chainlesschain/personal-data-hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -0
- package/__tests__/adapter-spec.test.js +78 -0
- package/__tests__/adapters/email-adapter.test.js +605 -0
- package/__tests__/adapters/email-imap-session.test.js +334 -0
- package/__tests__/adapters/email-parser.test.js +244 -0
- package/__tests__/adapters/email-providers.test.js +84 -0
- package/__tests__/analysis.test.js +302 -0
- package/__tests__/batch.test.js +133 -0
- package/__tests__/bridges-cc-kg.test.js +231 -0
- package/__tests__/bridges-cc-llm.test.js +191 -0
- package/__tests__/bridges-cc-rag.test.js +162 -0
- package/__tests__/ids.test.js +45 -0
- package/__tests__/key-providers.test.js +126 -0
- package/__tests__/kg-derive.test.js +219 -0
- package/__tests__/llm-client.test.js +122 -0
- package/__tests__/mock-adapter.test.js +93 -0
- package/__tests__/prompt-builder.test.js +204 -0
- package/__tests__/query-parser.test.js +150 -0
- package/__tests__/rag-derive.test.js +169 -0
- package/__tests__/registry.test.js +304 -0
- package/__tests__/schemas.test.js +331 -0
- package/__tests__/vault.test.js +506 -0
- package/lib/adapter-spec.js +155 -0
- package/lib/adapters/email-imap/email-adapter.js +398 -0
- package/lib/adapters/email-imap/email-parser.js +177 -0
- package/lib/adapters/email-imap/imap-session.js +294 -0
- package/lib/adapters/email-imap/index.js +26 -0
- package/lib/adapters/email-imap/providers.js +111 -0
- package/lib/analysis.js +226 -0
- package/lib/batch.js +123 -0
- package/lib/bridges/cc-kg-sink.js +264 -0
- package/lib/bridges/cc-llm-adapter.js +169 -0
- package/lib/bridges/cc-rag-sink.js +118 -0
- package/lib/bridges/index.js +44 -0
- package/lib/constants.js +92 -0
- package/lib/ids.js +103 -0
- package/lib/index.js +141 -0
- package/lib/key-providers.js +146 -0
- package/lib/kg-derive.js +214 -0
- package/lib/llm-client.js +171 -0
- package/lib/migrations.js +246 -0
- package/lib/mock-adapter.js +199 -0
- package/lib/prompt-builder.js +205 -0
- package/lib/query-parser.js +250 -0
- package/lib/rag-derive.js +186 -0
- package/lib/registry.js +398 -0
- package/lib/schemas.js +379 -0
- package/lib/vault.js +883 -0
- package/package.json +63 -0
- package/vitest.config.js +10 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async-iterator wrapper around imapflow.
|
|
3
|
+
*
|
|
4
|
+
* The hub never imports imapflow directly — this file is the single
|
|
5
|
+
* static require boundary. EmailAdapter takes an ImapSession instance
|
|
6
|
+
* (or any object with the same surface) so tests can inject a mock
|
|
7
|
+
* without imapflow even being installed.
|
|
8
|
+
*
|
|
9
|
+
* Errors are surfaced with normalized `code`:
|
|
10
|
+
* AUTH_FAILED login rejected
|
|
11
|
+
* CONNECTION_FAILED TCP / TLS failure
|
|
12
|
+
* MAILBOX_NOT_FOUND folder doesn't exist
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
"use strict";
|
|
16
|
+
|
|
17
|
+
class ImapAuthFailedError extends Error {
|
|
18
|
+
constructor(message, cause) {
|
|
19
|
+
super(message || "IMAP authentication failed");
|
|
20
|
+
this.name = "ImapAuthFailedError";
|
|
21
|
+
this.code = "AUTH_FAILED";
|
|
22
|
+
if (cause) this.cause = cause;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
class ImapConnectionFailedError extends Error {
|
|
27
|
+
constructor(message, cause) {
|
|
28
|
+
super(message || "IMAP connection failed");
|
|
29
|
+
this.name = "ImapConnectionFailedError";
|
|
30
|
+
this.code = "CONNECTION_FAILED";
|
|
31
|
+
if (cause) this.cause = cause;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
class ImapMailboxNotFoundError extends Error {
|
|
36
|
+
constructor(name) {
|
|
37
|
+
super(`Mailbox not found: ${name}`);
|
|
38
|
+
this.name = "ImapMailboxNotFoundError";
|
|
39
|
+
this.code = "MAILBOX_NOT_FOUND";
|
|
40
|
+
this.mailbox = name;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
class ImapSession {
|
|
45
|
+
constructor(opts) {
|
|
46
|
+
if (!opts || typeof opts !== "object") {
|
|
47
|
+
throw new Error("ImapSession: opts required");
|
|
48
|
+
}
|
|
49
|
+
for (const f of ["host", "port", "user", "authCode"]) {
|
|
50
|
+
if (opts[f] === undefined || opts[f] === null || opts[f] === "") {
|
|
51
|
+
throw new Error(`ImapSession: opts.${f} required`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
this.host = String(opts.host);
|
|
55
|
+
this.port = Number(opts.port);
|
|
56
|
+
this.secure = opts.secure !== false;
|
|
57
|
+
this.user = String(opts.user);
|
|
58
|
+
this.authCode = String(opts.authCode);
|
|
59
|
+
this.connectTimeoutMs = Number.isFinite(opts.connectTimeoutMs)
|
|
60
|
+
? opts.connectTimeoutMs
|
|
61
|
+
: 15000;
|
|
62
|
+
this._factory = typeof opts.imapFlowFactory === "function" ? opts.imapFlowFactory : null;
|
|
63
|
+
this._client = null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async connect() {
|
|
67
|
+
let ImapFlowCtor;
|
|
68
|
+
if (this._factory) {
|
|
69
|
+
ImapFlowCtor = this._factory;
|
|
70
|
+
} else {
|
|
71
|
+
try {
|
|
72
|
+
const mod = require("imapflow");
|
|
73
|
+
ImapFlowCtor = mod.ImapFlow || mod.default || mod;
|
|
74
|
+
} catch (err) {
|
|
75
|
+
throw new ImapConnectionFailedError(
|
|
76
|
+
"imapflow is not installed. Run `npm install imapflow` in the workspace.",
|
|
77
|
+
err
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const ctorOpts = {
|
|
83
|
+
host: this.host,
|
|
84
|
+
port: this.port,
|
|
85
|
+
secure: this.secure,
|
|
86
|
+
auth: { user: this.user, pass: this.authCode },
|
|
87
|
+
logger: false,
|
|
88
|
+
};
|
|
89
|
+
let client;
|
|
90
|
+
try {
|
|
91
|
+
// Real imapflow's ImapFlow is an ES class → must use `new`. Test
|
|
92
|
+
// injection sometimes passes an arrow factory which `new` rejects
|
|
93
|
+
// (TypeError: not a constructor). Try constructor first, fall
|
|
94
|
+
// through to plain call so both shapes work.
|
|
95
|
+
try {
|
|
96
|
+
client = new ImapFlowCtor(ctorOpts);
|
|
97
|
+
} catch (ctorErr) {
|
|
98
|
+
if (ctorErr instanceof TypeError) {
|
|
99
|
+
client = ImapFlowCtor(ctorOpts);
|
|
100
|
+
} else {
|
|
101
|
+
throw ctorErr;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
} catch (err) {
|
|
105
|
+
throw new ImapConnectionFailedError(
|
|
106
|
+
`Failed to construct IMAP client: ${err && err.message ? err.message : err}`,
|
|
107
|
+
err
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
let connectPromise;
|
|
112
|
+
try {
|
|
113
|
+
connectPromise = client.connect();
|
|
114
|
+
} catch (err) {
|
|
115
|
+
throw new ImapConnectionFailedError(
|
|
116
|
+
`IMAP connect threw synchronously: ${err && err.message ? err.message : err}`,
|
|
117
|
+
err
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let timer = null;
|
|
122
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
123
|
+
timer = setTimeout(() => {
|
|
124
|
+
reject(new ImapConnectionFailedError(
|
|
125
|
+
`IMAP connect timed out after ${this.connectTimeoutMs}ms`
|
|
126
|
+
));
|
|
127
|
+
}, this.connectTimeoutMs);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
await Promise.race([connectPromise, timeoutPromise]);
|
|
132
|
+
} catch (err) {
|
|
133
|
+
try { await client.close(); } catch (_e) {}
|
|
134
|
+
const msg = (err && err.message ? err.message : String(err)).toLowerCase();
|
|
135
|
+
if (msg.includes("auth") || msg.includes("invalid credentials") || msg.includes("login") || msg.includes("rejected")) {
|
|
136
|
+
throw new ImapAuthFailedError(err && err.message, err);
|
|
137
|
+
}
|
|
138
|
+
if (err && err.code === "AUTH_FAILED") throw err;
|
|
139
|
+
if (err && err.code === "CONNECTION_FAILED") throw err;
|
|
140
|
+
throw new ImapConnectionFailedError(
|
|
141
|
+
`IMAP connect failed: ${err && err.message ? err.message : err}`,
|
|
142
|
+
err
|
|
143
|
+
);
|
|
144
|
+
} finally {
|
|
145
|
+
if (timer) clearTimeout(timer);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
this._client = client;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
_requireConnected() {
|
|
152
|
+
if (!this._client) {
|
|
153
|
+
throw new Error("ImapSession: not connected; call connect() first.");
|
|
154
|
+
}
|
|
155
|
+
return this._client;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async openMailbox(name) {
|
|
159
|
+
const c = this._requireConnected();
|
|
160
|
+
if (typeof name !== "string" || !name) {
|
|
161
|
+
throw new Error("openMailbox: name must be a non-empty string");
|
|
162
|
+
}
|
|
163
|
+
let info;
|
|
164
|
+
try {
|
|
165
|
+
info = await c.mailboxOpen(name);
|
|
166
|
+
} catch (err) {
|
|
167
|
+
const msg = (err && err.message ? err.message : "").toLowerCase();
|
|
168
|
+
if (msg.includes("doesn't exist") || msg.includes("nonexistent") || msg.includes("not found")) {
|
|
169
|
+
throw new ImapMailboxNotFoundError(name);
|
|
170
|
+
}
|
|
171
|
+
throw err;
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
uidValidity: info.uidValidity,
|
|
175
|
+
uidNext: info.uidNext,
|
|
176
|
+
exists: info.exists,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
async listMailboxes() {
|
|
181
|
+
const c = this._requireConnected();
|
|
182
|
+
const items = await c.list();
|
|
183
|
+
if (!Array.isArray(items)) return [];
|
|
184
|
+
return items.map((m) => ({
|
|
185
|
+
name: m.name,
|
|
186
|
+
path: m.path,
|
|
187
|
+
specialUse: m.specialUse || null,
|
|
188
|
+
flags: Array.isArray(m.flags) ? m.flags : [],
|
|
189
|
+
}));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async *fetchEnvelopesSince(sinceUid = 0) {
|
|
193
|
+
const c = this._requireConnected();
|
|
194
|
+
const baseUid = Number.isFinite(sinceUid) && sinceUid > 0 ? sinceUid : 0;
|
|
195
|
+
const range = `${baseUid + 1}:*`;
|
|
196
|
+
const fields = { envelope: true, internalDate: true, flags: true, size: true, uid: true };
|
|
197
|
+
const iter = c.fetch(range, fields, { uid: true });
|
|
198
|
+
for await (const msg of iter) {
|
|
199
|
+
yield this._toEnvelopeRow(msg);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Like fetchEnvelopesSince but also pulls the full RFC822 source of
|
|
205
|
+
* each message (`source: true`). Phase 5.2 — the EmailAdapter feeds
|
|
206
|
+
* these into the mailparser-based email-parser to extract body text,
|
|
207
|
+
* HTML, attachments metadata, etc.
|
|
208
|
+
*
|
|
209
|
+
* Memory note: source bytes pile up in memory until each generator
|
|
210
|
+
* consumer awaits the next iteration. For huge mailboxes the registry's
|
|
211
|
+
* batchSize (default 100) acts as the natural back-pressure — every
|
|
212
|
+
* `batchSize` messages get committed to vault before the next batch
|
|
213
|
+
* pulls. Phase 5.5 PDF-decryption work will switch to per-attachment
|
|
214
|
+
* download for emails > N MB.
|
|
215
|
+
*
|
|
216
|
+
* Yields `{ ...envelopeRow, source: Buffer }`.
|
|
217
|
+
*
|
|
218
|
+
* @param {number} sinceUid
|
|
219
|
+
* @returns {AsyncGenerator}
|
|
220
|
+
*/
|
|
221
|
+
async *fetchFullSince(sinceUid = 0) {
|
|
222
|
+
const c = this._requireConnected();
|
|
223
|
+
const baseUid = Number.isFinite(sinceUid) && sinceUid > 0 ? sinceUid : 0;
|
|
224
|
+
const range = `${baseUid + 1}:*`;
|
|
225
|
+
const fields = {
|
|
226
|
+
envelope: true,
|
|
227
|
+
internalDate: true,
|
|
228
|
+
flags: true,
|
|
229
|
+
size: true,
|
|
230
|
+
uid: true,
|
|
231
|
+
source: true, // raw RFC822 bytes
|
|
232
|
+
};
|
|
233
|
+
const iter = c.fetch(range, fields, { uid: true });
|
|
234
|
+
for await (const msg of iter) {
|
|
235
|
+
const row = this._toEnvelopeRow(msg);
|
|
236
|
+
// imapflow returns source as a Buffer; defensively coerce.
|
|
237
|
+
row.source = Buffer.isBuffer(msg.source)
|
|
238
|
+
? msg.source
|
|
239
|
+
: msg.source
|
|
240
|
+
? Buffer.from(msg.source)
|
|
241
|
+
: Buffer.alloc(0);
|
|
242
|
+
yield row;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async close() {
|
|
247
|
+
if (!this._client) return;
|
|
248
|
+
try {
|
|
249
|
+
await this._client.logout();
|
|
250
|
+
} catch (_err) {}
|
|
251
|
+
try {
|
|
252
|
+
await this._client.close();
|
|
253
|
+
} catch (_err) {}
|
|
254
|
+
this._client = null;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
_toEnvelopeRow(msg) {
|
|
258
|
+
const env = msg.envelope || {};
|
|
259
|
+
// imapflow returns flags as a Set; older shapes use Array. Cover both.
|
|
260
|
+
let flags = [];
|
|
261
|
+
if (msg.flags) {
|
|
262
|
+
if (msg.flags instanceof Set || Array.isArray(msg.flags) || typeof msg.flags[Symbol.iterator] === "function") {
|
|
263
|
+
flags = Array.from(msg.flags);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return {
|
|
267
|
+
uid: msg.uid,
|
|
268
|
+
internalDate: msg.internalDate instanceof Date ? msg.internalDate : new Date(msg.internalDate || 0),
|
|
269
|
+
flags,
|
|
270
|
+
messageId: typeof env.messageId === "string" ? env.messageId : "",
|
|
271
|
+
subject: typeof env.subject === "string" ? env.subject : "",
|
|
272
|
+
from: this._addrs(env.from),
|
|
273
|
+
to: this._addrs(env.to),
|
|
274
|
+
cc: this._addrs(env.cc),
|
|
275
|
+
date: env.date instanceof Date ? env.date : env.date ? new Date(env.date) : null,
|
|
276
|
+
size: typeof msg.size === "number" ? msg.size : 0,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
_addrs(list) {
|
|
281
|
+
if (!Array.isArray(list)) return [];
|
|
282
|
+
return list.map((a) => ({
|
|
283
|
+
name: a && a.name ? String(a.name) : undefined,
|
|
284
|
+
address: a && a.address ? String(a.address) : "",
|
|
285
|
+
}));
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
module.exports = {
|
|
290
|
+
ImapSession,
|
|
291
|
+
ImapAuthFailedError,
|
|
292
|
+
ImapConnectionFailedError,
|
|
293
|
+
ImapMailboxNotFoundError,
|
|
294
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const { EmailAdapter, parseWatermark, formatWatermark, NAME, VERSION } = require("./email-adapter");
|
|
4
|
+
const { PROVIDERS, resolveProvider } = require("./providers");
|
|
5
|
+
const {
|
|
6
|
+
ImapSession,
|
|
7
|
+
ImapAuthFailedError,
|
|
8
|
+
ImapConnectionFailedError,
|
|
9
|
+
ImapMailboxNotFoundError,
|
|
10
|
+
} = require("./imap-session");
|
|
11
|
+
const { parseRawEmail } = require("./email-parser");
|
|
12
|
+
|
|
13
|
+
module.exports = {
|
|
14
|
+
EmailAdapter,
|
|
15
|
+
EMAIL_ADAPTER_NAME: NAME,
|
|
16
|
+
EMAIL_ADAPTER_VERSION: VERSION,
|
|
17
|
+
parseWatermark,
|
|
18
|
+
formatWatermark,
|
|
19
|
+
EMAIL_PROVIDERS: PROVIDERS,
|
|
20
|
+
resolveEmailProvider: resolveProvider,
|
|
21
|
+
ImapSession,
|
|
22
|
+
ImapAuthFailedError,
|
|
23
|
+
ImapConnectionFailedError,
|
|
24
|
+
ImapMailboxNotFoundError,
|
|
25
|
+
parseRawEmail,
|
|
26
|
+
};
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Email provider presets — mainland-China-first.
|
|
3
|
+
*
|
|
4
|
+
* Each entry tells the adapter where to connect + which folders are
|
|
5
|
+
* worth syncing by default. Users can override host/port/tls via the
|
|
6
|
+
* adapter constructor (provider="custom").
|
|
7
|
+
*
|
|
8
|
+
* Authentication is consistently the per-provider "authorization code"
|
|
9
|
+
* pattern (per design doc §3 OQ-1). The user goes to their email
|
|
10
|
+
* web console, enables IMAP/SMTP, copies the auth code, pastes it
|
|
11
|
+
* into the adapter config. The adapter never sees the user's actual
|
|
12
|
+
* login password.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
"use strict";
|
|
16
|
+
|
|
17
|
+
const PROVIDERS = Object.freeze({
|
|
18
|
+
qq: {
|
|
19
|
+
id: "qq",
|
|
20
|
+
displayName: "QQ 邮箱",
|
|
21
|
+
host: "imap.qq.com",
|
|
22
|
+
port: 993,
|
|
23
|
+
secure: true,
|
|
24
|
+
setupUrl: "https://mail.qq.com/cgi-bin/frame_html?sid=&r=&t=client",
|
|
25
|
+
defaultFolders: ["INBOX", "Sent Messages"],
|
|
26
|
+
authNote: "Use 授权码 (设置 → 账户 → 开启 IMAP/SMTP), NOT your QQ login password.",
|
|
27
|
+
},
|
|
28
|
+
"189": {
|
|
29
|
+
id: "189",
|
|
30
|
+
displayName: "189 邮箱",
|
|
31
|
+
host: "imap.189.cn",
|
|
32
|
+
port: 993,
|
|
33
|
+
secure: true,
|
|
34
|
+
setupUrl: "https://mail.189.cn/",
|
|
35
|
+
defaultFolders: ["INBOX", "已发送"],
|
|
36
|
+
authNote: "Use 授权码 (设置 → 第三方客户端授权码).",
|
|
37
|
+
},
|
|
38
|
+
"163": {
|
|
39
|
+
id: "163",
|
|
40
|
+
displayName: "网易邮箱 (163/126)",
|
|
41
|
+
host: "imap.163.com",
|
|
42
|
+
port: 993,
|
|
43
|
+
secure: true,
|
|
44
|
+
setupUrl: "https://mail.163.com/",
|
|
45
|
+
defaultFolders: ["INBOX", "已发送"],
|
|
46
|
+
authNote: "Use 授权码 (设置 → POP3/SMTP/IMAP).",
|
|
47
|
+
},
|
|
48
|
+
outlook: {
|
|
49
|
+
id: "outlook",
|
|
50
|
+
displayName: "Outlook / Hotmail",
|
|
51
|
+
host: "outlook.office365.com",
|
|
52
|
+
port: 993,
|
|
53
|
+
secure: true,
|
|
54
|
+
setupUrl: "https://outlook.live.com/mail/0/options/mail/forwarding",
|
|
55
|
+
defaultFolders: ["INBOX", "Sent"],
|
|
56
|
+
authNote: "App password (account.microsoft.com/security) — basic-auth deprecation pending; v1 will switch to OAuth2.",
|
|
57
|
+
},
|
|
58
|
+
gmail: {
|
|
59
|
+
id: "gmail",
|
|
60
|
+
displayName: "Gmail",
|
|
61
|
+
host: "imap.gmail.com",
|
|
62
|
+
port: 993,
|
|
63
|
+
secure: true,
|
|
64
|
+
setupUrl: "https://myaccount.google.com/apppasswords",
|
|
65
|
+
defaultFolders: ["INBOX", "[Gmail]/Sent Mail"],
|
|
66
|
+
authNote: "App password (myaccount.google.com/apppasswords). OAuth2 in v2.",
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
function resolveProvider(account) {
|
|
71
|
+
if (!account || typeof account !== "object") {
|
|
72
|
+
throw new Error("resolveProvider: account required");
|
|
73
|
+
}
|
|
74
|
+
const id = account.provider;
|
|
75
|
+
if (id === "custom") {
|
|
76
|
+
if (typeof account.host !== "string" || !account.host) {
|
|
77
|
+
throw new Error("resolveProvider: custom provider requires host");
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
host: account.host,
|
|
81
|
+
port: Number.isInteger(account.port) ? account.port : 993,
|
|
82
|
+
secure: account.secure !== false,
|
|
83
|
+
folders: Array.isArray(account.folders) && account.folders.length > 0
|
|
84
|
+
? account.folders
|
|
85
|
+
: ["INBOX"],
|
|
86
|
+
displayName: account.displayName || account.host,
|
|
87
|
+
providerId: "custom",
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
const preset = PROVIDERS[id];
|
|
91
|
+
if (!preset) {
|
|
92
|
+
throw new Error(
|
|
93
|
+
`resolveProvider: unknown provider "${id}". Known: ${Object.keys(PROVIDERS).join(", ")}, or use "custom".`
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
host: account.host || preset.host,
|
|
98
|
+
port: Number.isInteger(account.port) ? account.port : preset.port,
|
|
99
|
+
secure: typeof account.secure === "boolean" ? account.secure : preset.secure,
|
|
100
|
+
folders: Array.isArray(account.folders) && account.folders.length > 0
|
|
101
|
+
? account.folders
|
|
102
|
+
: preset.defaultFolders,
|
|
103
|
+
displayName: account.displayName || preset.displayName,
|
|
104
|
+
providerId: preset.id,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
module.exports = {
|
|
109
|
+
PROVIDERS,
|
|
110
|
+
resolveProvider,
|
|
111
|
+
};
|
package/lib/analysis.js
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AnalysisEngine — natural-language Q&A skeleton for Personal Data Hub.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors §8 of docs/design/Personal_Data_Hub_Architecture.md. The flow:
|
|
5
|
+
*
|
|
6
|
+
* ask(question) →
|
|
7
|
+
* 1. parseQuery(question) — time window + filters + intent
|
|
8
|
+
* 2. gatherFacts(parsed) — vault.queryEvents with filters
|
|
9
|
+
* + optional ragRetriever(question) for additional context
|
|
10
|
+
* 3. buildPrompt(question, facts) → messages
|
|
11
|
+
* 4. llm.chat(messages) → text
|
|
12
|
+
* 5. parseCitations(text) — extract bracketed ids
|
|
13
|
+
* 6. validateCitations(...) — known vs hallucinated
|
|
14
|
+
* 7. vault.audit(...) — record query + facts cited
|
|
15
|
+
* 8. return { answer, citations, facts, hallucinatedCitations, ... }
|
|
16
|
+
*
|
|
17
|
+
* Privacy invariant (§11.2): the engine refuses to call a non-local LLM
|
|
18
|
+
* unless the caller passes acceptNonLocal: true. This is a hard runtime
|
|
19
|
+
* gate — every layer downstream of the engine assumes locality.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const { parseQuery } = require("./query-parser");
|
|
25
|
+
const {
|
|
26
|
+
buildPrompt,
|
|
27
|
+
parseCitations,
|
|
28
|
+
validateCitations,
|
|
29
|
+
DEFAULT_SYSTEM_PROMPT,
|
|
30
|
+
} = require("./prompt-builder");
|
|
31
|
+
const { toError } = require("./adapter-spec");
|
|
32
|
+
|
|
33
|
+
const DEFAULT_MAX_FACTS = 80;
|
|
34
|
+
const DEFAULT_MAX_QUERY_LIMIT = 200;
|
|
35
|
+
|
|
36
|
+
class AnalysisEngine {
|
|
37
|
+
/**
|
|
38
|
+
* @param {object} opts
|
|
39
|
+
* @param {import("./vault").LocalVault} opts.vault
|
|
40
|
+
* @param {{chat: Function, isLocal: boolean, name?: string}} opts.llm
|
|
41
|
+
* @param {(question: string, parsed: object) => Promise<Array<{text: string, metadata: object}>>} [opts.ragRetriever]
|
|
42
|
+
* @param {number} [opts.maxFacts=80]
|
|
43
|
+
* @param {number} [opts.maxQueryLimit=200]
|
|
44
|
+
* @param {string} [opts.systemPrompt]
|
|
45
|
+
*/
|
|
46
|
+
constructor(opts) {
|
|
47
|
+
if (!opts || typeof opts !== "object") throw new Error("AnalysisEngine: opts required");
|
|
48
|
+
if (!opts.vault) throw new Error("AnalysisEngine: opts.vault required");
|
|
49
|
+
if (!opts.llm || typeof opts.llm.chat !== "function") {
|
|
50
|
+
throw new Error("AnalysisEngine: opts.llm with .chat() required");
|
|
51
|
+
}
|
|
52
|
+
if (typeof opts.llm.isLocal !== "boolean") {
|
|
53
|
+
throw new Error("AnalysisEngine: opts.llm.isLocal must be declared (true/false)");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
this.vault = opts.vault;
|
|
57
|
+
this.llm = opts.llm;
|
|
58
|
+
this.ragRetriever = typeof opts.ragRetriever === "function" ? opts.ragRetriever : null;
|
|
59
|
+
this.maxFacts = Number.isInteger(opts.maxFacts) && opts.maxFacts > 0 ? opts.maxFacts : DEFAULT_MAX_FACTS;
|
|
60
|
+
this.maxQueryLimit =
|
|
61
|
+
Number.isInteger(opts.maxQueryLimit) && opts.maxQueryLimit > 0
|
|
62
|
+
? opts.maxQueryLimit
|
|
63
|
+
: DEFAULT_MAX_QUERY_LIMIT;
|
|
64
|
+
this.systemPrompt = opts.systemPrompt || DEFAULT_SYSTEM_PROMPT;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Ask a natural-language question.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} question
|
|
71
|
+
* @param {object} [options]
|
|
72
|
+
* @param {boolean} [options.acceptNonLocal=false] required true for cloud LLMs
|
|
73
|
+
* @param {number} [options.now]
|
|
74
|
+
* @param {boolean} [options.skipAudit=false]
|
|
75
|
+
* @returns {Promise<AskResult>}
|
|
76
|
+
*
|
|
77
|
+
* @typedef {object} AskResult
|
|
78
|
+
* @property {string} answer
|
|
79
|
+
* @property {string[]} citations event ids cited AND known
|
|
80
|
+
* @property {string[]} hallucinatedCitations event ids cited but not in facts
|
|
81
|
+
* @property {Array<object>} facts facts handed to the LLM
|
|
82
|
+
* @property {object} parsed parseQuery output
|
|
83
|
+
* @property {object} usage { promptTokens, completionTokens, totalTokens }
|
|
84
|
+
* @property {string} model
|
|
85
|
+
* @property {number} durationMs
|
|
86
|
+
* @property {string|null} warning "no-facts" | "hallucinated-citations" | null
|
|
87
|
+
*/
|
|
88
|
+
async ask(question, options = {}) {
|
|
89
|
+
if (typeof question !== "string" || question.length === 0) {
|
|
90
|
+
throw new Error("AnalysisEngine.ask: question must be a non-empty string");
|
|
91
|
+
}
|
|
92
|
+
if (!this.llm.isLocal && !options.acceptNonLocal) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
"AnalysisEngine.ask: LLM declared non-local; pass acceptNonLocal: true to opt in. " +
|
|
95
|
+
"(Personal Data Hub default policy: all inference stays on-device.)"
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const startedAt = Date.now();
|
|
100
|
+
const parsed = parseQuery(question, { now: options.now });
|
|
101
|
+
|
|
102
|
+
// Gather facts from the vault.
|
|
103
|
+
const facts = this._gatherFacts(parsed);
|
|
104
|
+
|
|
105
|
+
// Optional RAG augmentation.
|
|
106
|
+
let ragContext = [];
|
|
107
|
+
if (this.ragRetriever) {
|
|
108
|
+
try {
|
|
109
|
+
const docs = await this.ragRetriever(question, parsed);
|
|
110
|
+
if (Array.isArray(docs)) {
|
|
111
|
+
// RAG retriever returns docs with metadata.id — fetch matching entities
|
|
112
|
+
// from vault for citation tracking.
|
|
113
|
+
for (const doc of docs) {
|
|
114
|
+
if (!doc || !doc.id) continue;
|
|
115
|
+
const e = this.vault.getEvent(doc.id);
|
|
116
|
+
if (e && !facts.find((f) => f.id === e.id)) {
|
|
117
|
+
facts.push(e);
|
|
118
|
+
ragContext.push(doc.id);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
} catch (err) {
|
|
123
|
+
// RAG failure shouldn't abort Q&A — log and continue with direct facts.
|
|
124
|
+
const e = toError(err, "ragRetriever");
|
|
125
|
+
try {
|
|
126
|
+
this.vault.audit("analysis.rag_failed", question, { error: e.message });
|
|
127
|
+
} catch (_e) {}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Build prompt.
|
|
132
|
+
const { messages, factIds, factCount, truncated } = buildPrompt({
|
|
133
|
+
question,
|
|
134
|
+
facts,
|
|
135
|
+
systemPrompt: this.systemPrompt,
|
|
136
|
+
intent: parsed.intent,
|
|
137
|
+
timeWindow: parsed.timeWindow,
|
|
138
|
+
maxFacts: this.maxFacts,
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// Call LLM.
|
|
142
|
+
let llmResp;
|
|
143
|
+
try {
|
|
144
|
+
llmResp = await this.llm.chat(messages, {
|
|
145
|
+
temperature: 0.2,
|
|
146
|
+
purpose: "personal-data-hub.analysis.ask",
|
|
147
|
+
});
|
|
148
|
+
} catch (err) {
|
|
149
|
+
const e = toError(err, "llm.chat");
|
|
150
|
+
try {
|
|
151
|
+
this.vault.audit("analysis.llm_failed", question, { error: e.message });
|
|
152
|
+
} catch (_e) {}
|
|
153
|
+
throw e;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const answer = (llmResp && typeof llmResp.text === "string") ? llmResp.text : "";
|
|
157
|
+
|
|
158
|
+
// Parse + validate citations.
|
|
159
|
+
const cited = parseCitations(answer);
|
|
160
|
+
const { known, unknown } = validateCitations(cited, factIds);
|
|
161
|
+
|
|
162
|
+
// Warnings.
|
|
163
|
+
let warning = null;
|
|
164
|
+
if (factCount === 0) warning = "no-facts";
|
|
165
|
+
else if (unknown.length > 0) warning = "hallucinated-citations";
|
|
166
|
+
|
|
167
|
+
const durationMs = Date.now() - startedAt;
|
|
168
|
+
const usage = llmResp.usage || {};
|
|
169
|
+
|
|
170
|
+
if (!options.skipAudit) {
|
|
171
|
+
try {
|
|
172
|
+
this.vault.audit("analysis.ask", question, {
|
|
173
|
+
factCount,
|
|
174
|
+
truncated,
|
|
175
|
+
citationsKnown: known.length,
|
|
176
|
+
citationsUnknown: unknown.length,
|
|
177
|
+
warning,
|
|
178
|
+
durationMs,
|
|
179
|
+
model: this.llm.name || (llmResp && llmResp.model),
|
|
180
|
+
});
|
|
181
|
+
} catch (_e) {}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
answer,
|
|
186
|
+
citations: known,
|
|
187
|
+
hallucinatedCitations: unknown,
|
|
188
|
+
facts,
|
|
189
|
+
ragContextIds: ragContext,
|
|
190
|
+
parsed,
|
|
191
|
+
usage,
|
|
192
|
+
model: this.llm.name || (llmResp && llmResp.model) || "unknown",
|
|
193
|
+
durationMs,
|
|
194
|
+
warning,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// ─── Internals ─────────────────────────────────────────────────────
|
|
199
|
+
|
|
200
|
+
_gatherFacts(parsed) {
|
|
201
|
+
// Deliberately do NOT pass parsed.filters.subtype as a vault filter:
|
|
202
|
+
// the keyword heuristic (`order` vs `payment` vs `transfer`) is too
|
|
203
|
+
// crude to reliably narrow without false negatives. E.g. a user
|
|
204
|
+
// asking "在淘宝花了多少" wants taobao-adapter ORDER events; the
|
|
205
|
+
// keyword parser picks `payment` and would over-filter to zero rows.
|
|
206
|
+
// Instead we filter by adapter + time window (both reliable) and
|
|
207
|
+
// pass the subtype/intent into the prompt as a HINT for the LLM to
|
|
208
|
+
// apply on prose. The LLM is good at filtering; SQL keyword guessing
|
|
209
|
+
// is brittle.
|
|
210
|
+
const q = {
|
|
211
|
+
limit: this.maxQueryLimit,
|
|
212
|
+
};
|
|
213
|
+
if (parsed.filters && parsed.filters.adapter) q.adapter = parsed.filters.adapter;
|
|
214
|
+
if (parsed.timeWindow) {
|
|
215
|
+
if (Number.isFinite(parsed.timeWindow.since)) q.since = parsed.timeWindow.since;
|
|
216
|
+
if (Number.isFinite(parsed.timeWindow.until)) q.until = parsed.timeWindow.until;
|
|
217
|
+
}
|
|
218
|
+
return this.vault.queryEvents(q);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
module.exports = {
|
|
223
|
+
AnalysisEngine,
|
|
224
|
+
DEFAULT_MAX_FACTS,
|
|
225
|
+
DEFAULT_MAX_QUERY_LIMIT,
|
|
226
|
+
};
|