@chainlesschain/personal-data-hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -0
- package/__tests__/adapter-spec.test.js +78 -0
- package/__tests__/adapters/email-adapter.test.js +605 -0
- package/__tests__/adapters/email-imap-session.test.js +334 -0
- package/__tests__/adapters/email-parser.test.js +244 -0
- package/__tests__/adapters/email-providers.test.js +84 -0
- package/__tests__/analysis.test.js +302 -0
- package/__tests__/batch.test.js +133 -0
- package/__tests__/bridges-cc-kg.test.js +231 -0
- package/__tests__/bridges-cc-llm.test.js +191 -0
- package/__tests__/bridges-cc-rag.test.js +162 -0
- package/__tests__/ids.test.js +45 -0
- package/__tests__/key-providers.test.js +126 -0
- package/__tests__/kg-derive.test.js +219 -0
- package/__tests__/llm-client.test.js +122 -0
- package/__tests__/mock-adapter.test.js +93 -0
- package/__tests__/prompt-builder.test.js +204 -0
- package/__tests__/query-parser.test.js +150 -0
- package/__tests__/rag-derive.test.js +169 -0
- package/__tests__/registry.test.js +304 -0
- package/__tests__/schemas.test.js +331 -0
- package/__tests__/vault.test.js +506 -0
- package/lib/adapter-spec.js +155 -0
- package/lib/adapters/email-imap/email-adapter.js +398 -0
- package/lib/adapters/email-imap/email-parser.js +177 -0
- package/lib/adapters/email-imap/imap-session.js +294 -0
- package/lib/adapters/email-imap/index.js +26 -0
- package/lib/adapters/email-imap/providers.js +111 -0
- package/lib/analysis.js +226 -0
- package/lib/batch.js +123 -0
- package/lib/bridges/cc-kg-sink.js +264 -0
- package/lib/bridges/cc-llm-adapter.js +169 -0
- package/lib/bridges/cc-rag-sink.js +118 -0
- package/lib/bridges/index.js +44 -0
- package/lib/constants.js +92 -0
- package/lib/ids.js +103 -0
- package/lib/index.js +141 -0
- package/lib/key-providers.js +146 -0
- package/lib/kg-derive.js +214 -0
- package/lib/llm-client.js +171 -0
- package/lib/migrations.js +246 -0
- package/lib/mock-adapter.js +199 -0
- package/lib/prompt-builder.js +205 -0
- package/lib/query-parser.js +250 -0
- package/lib/rag-derive.js +186 -0
- package/lib/registry.js +398 -0
- package/lib/schemas.js +379 -0
- package/lib/vault.js +883 -0
- package/package.json +63 -0
- package/vitest.config.js +10 -0
package/lib/kg-derive.js
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UnifiedSchema → Knowledge Graph triples.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors §5.3 of docs/design/Personal_Data_Hub_Architecture.md:
|
|
5
|
+
* Event(<id>) --occurred-at--> Time(<ts>)
|
|
6
|
+
* Event(<id>) --happened-at--> Place(<id>)
|
|
7
|
+
* Event(<id>) --involves--> Person(<id>) (one per participant)
|
|
8
|
+
* Event(<id>) --by--> Person(<actor>)
|
|
9
|
+
* Event(<id>) --about--> Item(<id>)
|
|
10
|
+
* Event(<id>) --topic--> Topic(<id>)
|
|
11
|
+
* Event(<id>) --type--> "<subtype>"
|
|
12
|
+
* Event(<id>) --source--> "<adapter>"
|
|
13
|
+
*
|
|
14
|
+
* Plus for entities themselves so the KG knows they exist:
|
|
15
|
+
* Person(<id>) --rdf:type--> "person"
|
|
16
|
+
* Person(<id>) --has-name--> "<name>" (one per name)
|
|
17
|
+
* Place(<id>) --located-at--> "lat,lng" (if coordinates)
|
|
18
|
+
* Item(<id>) --priced-at--> "<value> <currency>" (if priced)
|
|
19
|
+
* Topic(<id>) --parent--> Topic(<parentId>) (if parented)
|
|
20
|
+
*
|
|
21
|
+
* The hub does NOT depend on a specific KG engine. The registry pipes
|
|
22
|
+
* these triples to a pluggable `kgSink({ subject, predicate, object,
|
|
23
|
+
* literal })` callback that the desktop wiring binds to ChainlessChain's
|
|
24
|
+
* existing KG layer (or to /dev/null in tests).
|
|
25
|
+
*
|
|
26
|
+
* Triples are plain JS objects so they can serialize to JSON for IPC, KG
|
|
27
|
+
* persistence, or human inspection.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
"use strict";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @typedef {object} Triple
|
|
34
|
+
* @property {string} subject entity id (typed by namespace prefix unless raw)
|
|
35
|
+
* @property {string} predicate
|
|
36
|
+
* @property {string} [object] another entity id (for entity-to-entity edges)
|
|
37
|
+
* @property {string|number|null} [literal] primitive value (for leaf edges)
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
function triple(subject, predicate, opts) {
|
|
41
|
+
// opts: { object: "<id>" } or { literal: ... }
|
|
42
|
+
const out = { subject, predicate };
|
|
43
|
+
if (opts && opts.object !== undefined && opts.object !== null) {
|
|
44
|
+
out.object = String(opts.object);
|
|
45
|
+
} else if (opts && opts.literal !== undefined && opts.literal !== null) {
|
|
46
|
+
out.literal = opts.literal;
|
|
47
|
+
}
|
|
48
|
+
return out;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ─── Per-entity derivers ────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
function deriveEventTriples(event) {
|
|
54
|
+
const out = [];
|
|
55
|
+
out.push(triple(event.id, "rdf:type", { literal: "event" }));
|
|
56
|
+
out.push(triple(event.id, "subtype", { literal: event.subtype }));
|
|
57
|
+
out.push(triple(event.id, "occurred-at", { literal: event.occurredAt }));
|
|
58
|
+
if (event.source && event.source.adapter) {
|
|
59
|
+
out.push(triple(event.id, "source", { literal: event.source.adapter }));
|
|
60
|
+
}
|
|
61
|
+
if (event.actor) {
|
|
62
|
+
out.push(triple(event.id, "by", { object: event.actor }));
|
|
63
|
+
}
|
|
64
|
+
if (Array.isArray(event.participants)) {
|
|
65
|
+
for (const p of event.participants) {
|
|
66
|
+
out.push(triple(event.id, "involves", { object: p }));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (event.place) {
|
|
70
|
+
out.push(triple(event.id, "happened-at", { object: event.place }));
|
|
71
|
+
}
|
|
72
|
+
if (Array.isArray(event.items)) {
|
|
73
|
+
for (const i of event.items) {
|
|
74
|
+
out.push(triple(event.id, "about", { object: i }));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
if (Array.isArray(event.topics)) {
|
|
78
|
+
for (const t of event.topics) {
|
|
79
|
+
out.push(triple(event.id, "topic", { object: t }));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (event.content && event.content.amount) {
|
|
83
|
+
const a = event.content.amount;
|
|
84
|
+
out.push(triple(event.id, "amount-value", { literal: a.value }));
|
|
85
|
+
out.push(triple(event.id, "amount-currency", { literal: a.currency }));
|
|
86
|
+
out.push(triple(event.id, "amount-direction", { literal: a.direction }));
|
|
87
|
+
}
|
|
88
|
+
return out;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function derivePersonTriples(person) {
|
|
92
|
+
const out = [];
|
|
93
|
+
out.push(triple(person.id, "rdf:type", { literal: "person" }));
|
|
94
|
+
out.push(triple(person.id, "subtype", { literal: person.subtype }));
|
|
95
|
+
for (const n of person.names) {
|
|
96
|
+
out.push(triple(person.id, "has-name", { literal: n }));
|
|
97
|
+
}
|
|
98
|
+
if (person.identifiers) {
|
|
99
|
+
for (const [kind, val] of Object.entries(person.identifiers)) {
|
|
100
|
+
if (val == null) continue;
|
|
101
|
+
if (Array.isArray(val)) {
|
|
102
|
+
for (const v of val) {
|
|
103
|
+
out.push(triple(person.id, `id:${kind}`, { literal: v }));
|
|
104
|
+
}
|
|
105
|
+
} else {
|
|
106
|
+
out.push(triple(person.id, `id:${kind}`, { literal: val }));
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (person.relation) {
|
|
111
|
+
out.push(triple(person.id, "relation", { literal: person.relation }));
|
|
112
|
+
}
|
|
113
|
+
return out;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function derivePlaceTriples(place) {
|
|
117
|
+
const out = [];
|
|
118
|
+
out.push(triple(place.id, "rdf:type", { literal: "place" }));
|
|
119
|
+
out.push(triple(place.id, "has-name", { literal: place.name }));
|
|
120
|
+
for (const a of place.aliases) {
|
|
121
|
+
if (a !== place.name) out.push(triple(place.id, "has-alias", { literal: a }));
|
|
122
|
+
}
|
|
123
|
+
if (place.coordinates) {
|
|
124
|
+
out.push(triple(place.id, "located-at", { literal: `${place.coordinates.lat},${place.coordinates.lng}` }));
|
|
125
|
+
}
|
|
126
|
+
if (place.address) {
|
|
127
|
+
out.push(triple(place.id, "address", { literal: place.address }));
|
|
128
|
+
}
|
|
129
|
+
if (place.category) {
|
|
130
|
+
out.push(triple(place.id, "category", { literal: place.category }));
|
|
131
|
+
}
|
|
132
|
+
return out;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function deriveItemTriples(item) {
|
|
136
|
+
const out = [];
|
|
137
|
+
out.push(triple(item.id, "rdf:type", { literal: "item" }));
|
|
138
|
+
out.push(triple(item.id, "subtype", { literal: item.subtype }));
|
|
139
|
+
out.push(triple(item.id, "has-name", { literal: item.name }));
|
|
140
|
+
if (item.category) out.push(triple(item.id, "category", { literal: item.category }));
|
|
141
|
+
if (item.price) {
|
|
142
|
+
out.push(triple(item.id, "priced-at", { literal: `${item.price.value} ${item.price.currency}` }));
|
|
143
|
+
}
|
|
144
|
+
if (item.merchant) {
|
|
145
|
+
out.push(triple(item.id, "sold-by", { object: item.merchant }));
|
|
146
|
+
}
|
|
147
|
+
return out;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function deriveTopicTriples(topic) {
|
|
151
|
+
const out = [];
|
|
152
|
+
out.push(triple(topic.id, "rdf:type", { literal: "topic" }));
|
|
153
|
+
out.push(triple(topic.id, "has-name", { literal: topic.name }));
|
|
154
|
+
if (topic.parentTopic) {
|
|
155
|
+
out.push(triple(topic.id, "parent", { object: topic.parentTopic }));
|
|
156
|
+
}
|
|
157
|
+
if (Array.isArray(topic.derivedFromEvents)) {
|
|
158
|
+
for (const ev of topic.derivedFromEvents) {
|
|
159
|
+
out.push(triple(topic.id, "derived-from", { object: ev }));
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return out;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Derive all KG triples for a NormalizedBatch in one call.
|
|
167
|
+
* Returns a flat array of triples in the order entities appear in the batch.
|
|
168
|
+
*/
|
|
169
|
+
function deriveBatchTriples(batch) {
|
|
170
|
+
const triples = [];
|
|
171
|
+
if (!batch || typeof batch !== "object") return triples;
|
|
172
|
+
|
|
173
|
+
for (const e of batch.events || []) triples.push(...deriveEventTriples(e));
|
|
174
|
+
for (const p of batch.persons || []) triples.push(...derivePersonTriples(p));
|
|
175
|
+
for (const pl of batch.places || []) triples.push(...derivePlaceTriples(pl));
|
|
176
|
+
for (const i of batch.items || []) triples.push(...deriveItemTriples(i));
|
|
177
|
+
for (const t of batch.topics || []) triples.push(...deriveTopicTriples(t));
|
|
178
|
+
|
|
179
|
+
return triples;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Dispatch a single entity to its appropriate deriver based on .type.
|
|
184
|
+
* Returns [] for unknown types rather than throwing — the registry uses
|
|
185
|
+
* this in hot loops where one weird row shouldn't kill the whole batch.
|
|
186
|
+
*/
|
|
187
|
+
function deriveEntityTriples(entity) {
|
|
188
|
+
if (!entity || typeof entity !== "object") return [];
|
|
189
|
+
switch (entity.type) {
|
|
190
|
+
case "event":
|
|
191
|
+
return deriveEventTriples(entity);
|
|
192
|
+
case "person":
|
|
193
|
+
return derivePersonTriples(entity);
|
|
194
|
+
case "place":
|
|
195
|
+
return derivePlaceTriples(entity);
|
|
196
|
+
case "item":
|
|
197
|
+
return deriveItemTriples(entity);
|
|
198
|
+
case "topic":
|
|
199
|
+
return deriveTopicTriples(entity);
|
|
200
|
+
default:
|
|
201
|
+
return [];
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
module.exports = {
|
|
206
|
+
triple,
|
|
207
|
+
deriveEventTriples,
|
|
208
|
+
derivePersonTriples,
|
|
209
|
+
derivePlaceTriples,
|
|
210
|
+
deriveItemTriples,
|
|
211
|
+
deriveTopicTriples,
|
|
212
|
+
deriveBatchTriples,
|
|
213
|
+
deriveEntityTriples,
|
|
214
|
+
};
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLMClient — pluggable LLM backend interface used by the AnalysisEngine.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors §8.4 of docs/design/Personal_Data_Hub_Architecture.md. The hub
|
|
5
|
+
* doesn't pick an LLM SDK; consumers inject one of:
|
|
6
|
+
*
|
|
7
|
+
* - OllamaClient — HTTP, local-only, default for the privacy promise
|
|
8
|
+
* - MockLLMClient — deterministic, for tests and skill development
|
|
9
|
+
* - (later) vLLM / Llama.cpp / cloud — explicit user opt-in only
|
|
10
|
+
*
|
|
11
|
+
* Contract: a single async chat(messages, opts) -> { text, usage?, model? }.
|
|
12
|
+
* Messages are { role: "system"|"user"|"assistant", content: string }.
|
|
13
|
+
*
|
|
14
|
+
* `isLocal` MUST accurately report whether the backend keeps data on the
|
|
15
|
+
* user's machine. The AnalysisEngine refuses to run with a
|
|
16
|
+
* non-local LLM unless `acceptNonLocal: true` is explicitly
|
|
17
|
+
* passed by the caller. Architecture-doc §11.2 invariant.
|
|
18
|
+
*
|
|
19
|
+
* `name` human-readable label surfaced in audit logs.
|
|
20
|
+
*
|
|
21
|
+
* Implementations should treat the messages array as opaque — the hub
|
|
22
|
+
* stitches together (system prompt) + (user question with embedded facts)
|
|
23
|
+
* and hands them off. No silent rewriting.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
"use strict";
|
|
27
|
+
|
|
28
|
+
// ─── MockLLMClient ───────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* MockLLMClient — deterministic, no network.
|
|
32
|
+
*
|
|
33
|
+
* Constructor takes one of:
|
|
34
|
+
* { reply: string } — always returns the same text
|
|
35
|
+
* { reply: (messages) => string } — function of input
|
|
36
|
+
* { replies: string[] } — returns one per call, errors after exhaustion
|
|
37
|
+
*
|
|
38
|
+
* Useful for testing AnalysisEngine end-to-end without an Ollama install.
|
|
39
|
+
*/
|
|
40
|
+
class MockLLMClient {
|
|
41
|
+
constructor(opts = {}) {
|
|
42
|
+
this.name = opts.name || "mock-llm";
|
|
43
|
+
this.isLocal = true;
|
|
44
|
+
this.calls = []; // each call recorded as { messages, opts } for asserting prompt shape
|
|
45
|
+
|
|
46
|
+
if (typeof opts.reply === "function") {
|
|
47
|
+
this._reply = opts.reply;
|
|
48
|
+
} else if (typeof opts.reply === "string") {
|
|
49
|
+
this._reply = () => opts.reply;
|
|
50
|
+
} else if (Array.isArray(opts.replies)) {
|
|
51
|
+
let i = 0;
|
|
52
|
+
this._reply = () => {
|
|
53
|
+
if (i >= opts.replies.length) {
|
|
54
|
+
throw new Error(`MockLLMClient: exhausted replies (${opts.replies.length} provided)`);
|
|
55
|
+
}
|
|
56
|
+
return opts.replies[i++];
|
|
57
|
+
};
|
|
58
|
+
} else {
|
|
59
|
+
this._reply = () => "(mock empty reply)";
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async chat(messages, opts = {}) {
|
|
64
|
+
this.calls.push({ messages: messages.slice(), opts: { ...opts } });
|
|
65
|
+
const text = this._reply(messages, opts);
|
|
66
|
+
return {
|
|
67
|
+
text,
|
|
68
|
+
model: this.name,
|
|
69
|
+
usage: {
|
|
70
|
+
promptTokens: messages.reduce((n, m) => n + (m.content ? m.content.length : 0), 0) / 4 | 0,
|
|
71
|
+
completionTokens: (text || "").length / 4 | 0,
|
|
72
|
+
totalTokens: 0, // sum below
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ─── OllamaClient ────────────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* OllamaClient — talks to a local Ollama HTTP server.
|
|
82
|
+
*
|
|
83
|
+
* Default endpoint http://localhost:11434, default model qwen2.5:7b-instruct
|
|
84
|
+
* (per §8.4 recommendation). Uses /api/chat which expects:
|
|
85
|
+
*
|
|
86
|
+
* POST /api/chat
|
|
87
|
+
* { "model": "...", "messages": [...], "stream": false, "options": { ... } }
|
|
88
|
+
*
|
|
89
|
+
* Response: { message: { role: "assistant", content: "..." }, ... }
|
|
90
|
+
*
|
|
91
|
+
* No external dep — uses global fetch (Node 22+). If a request fails (network
|
|
92
|
+
* error / Ollama not running / model not pulled) the error surfaces to the
|
|
93
|
+
* caller with `cause` preserved, never silently downgrades to cloud.
|
|
94
|
+
*/
|
|
95
|
+
class OllamaClient {
|
|
96
|
+
constructor(opts = {}) {
|
|
97
|
+
this.baseUrl = (opts.baseUrl || "http://localhost:11434").replace(/\/$/, "");
|
|
98
|
+
this.model = opts.model || "qwen2.5:7b-instruct";
|
|
99
|
+
this.name = opts.name || `ollama:${this.model}`;
|
|
100
|
+
this.isLocal = true; // Ollama is always local-only by construction
|
|
101
|
+
this.timeoutMs = Number.isFinite(opts.timeoutMs) ? opts.timeoutMs : 60_000;
|
|
102
|
+
this._fetch = opts.fetch || (typeof fetch !== "undefined" ? fetch : null);
|
|
103
|
+
if (!this._fetch) {
|
|
104
|
+
throw new Error("OllamaClient: no fetch available. Node 22+ required, or pass opts.fetch.");
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async chat(messages, opts = {}) {
|
|
109
|
+
const ctrl = new AbortController();
|
|
110
|
+
const t = setTimeout(() => ctrl.abort(), this.timeoutMs);
|
|
111
|
+
|
|
112
|
+
let resp;
|
|
113
|
+
try {
|
|
114
|
+
resp = await this._fetch(`${this.baseUrl}/api/chat`, {
|
|
115
|
+
method: "POST",
|
|
116
|
+
headers: { "content-type": "application/json" },
|
|
117
|
+
body: JSON.stringify({
|
|
118
|
+
model: this.model,
|
|
119
|
+
messages,
|
|
120
|
+
stream: false,
|
|
121
|
+
options: {
|
|
122
|
+
temperature: typeof opts.temperature === "number" ? opts.temperature : 0.2,
|
|
123
|
+
...(opts.numCtx ? { num_ctx: opts.numCtx } : {}),
|
|
124
|
+
},
|
|
125
|
+
}),
|
|
126
|
+
signal: ctrl.signal,
|
|
127
|
+
});
|
|
128
|
+
} catch (err) {
|
|
129
|
+
clearTimeout(t);
|
|
130
|
+
const wrapped = new Error(`OllamaClient.chat: request failed — ${err && err.message ? err.message : err}`);
|
|
131
|
+
wrapped.cause = err;
|
|
132
|
+
throw wrapped;
|
|
133
|
+
}
|
|
134
|
+
clearTimeout(t);
|
|
135
|
+
|
|
136
|
+
if (!resp.ok) {
|
|
137
|
+
const body = await resp.text().catch(() => "");
|
|
138
|
+
throw new Error(`OllamaClient.chat: HTTP ${resp.status} ${resp.statusText} — ${body.slice(0, 200)}`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const json = await resp.json();
|
|
142
|
+
const text = json && json.message && typeof json.message.content === "string"
|
|
143
|
+
? json.message.content
|
|
144
|
+
: "";
|
|
145
|
+
return {
|
|
146
|
+
text,
|
|
147
|
+
model: this.model,
|
|
148
|
+
usage: {
|
|
149
|
+
promptTokens: json.prompt_eval_count || 0,
|
|
150
|
+
completionTokens: json.eval_count || 0,
|
|
151
|
+
totalTokens: (json.prompt_eval_count || 0) + (json.eval_count || 0),
|
|
152
|
+
},
|
|
153
|
+
raw: json,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Lightweight health check — pings /api/tags. */
|
|
158
|
+
async health() {
|
|
159
|
+
try {
|
|
160
|
+
const ctrl = new AbortController();
|
|
161
|
+
const t = setTimeout(() => ctrl.abort(), 5000);
|
|
162
|
+
const resp = await this._fetch(`${this.baseUrl}/api/tags`, { signal: ctrl.signal });
|
|
163
|
+
clearTimeout(t);
|
|
164
|
+
return { ok: resp.ok, status: resp.status };
|
|
165
|
+
} catch (err) {
|
|
166
|
+
return { ok: false, error: err && err.message ? err.message : String(err) };
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
module.exports = { MockLLMClient, OllamaClient };
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LocalVault schema migrations.
|
|
3
|
+
*
|
|
4
|
+
* Each migration is a `{ version, up(db) }` record. Applied in order, idempotent.
|
|
5
|
+
* The current version is recorded in the `_meta` table so re-opening a vault
|
|
6
|
+
* runs only the missing migrations.
|
|
7
|
+
*
|
|
8
|
+
* Design notes:
|
|
9
|
+
* - Each UnifiedSchema entity type gets its own table. Cross-entity references
|
|
10
|
+
* (Event.actor → Person.id, Event.topics → Topic.id[]) are *string IDs*,
|
|
11
|
+
* not FKs — adapters often produce entities out of order (event refs a
|
|
12
|
+
* person we'll insert next batch), so FKs would force two-pass ingest.
|
|
13
|
+
* - JSON columns (participants, items, topics, identifiers, content, extra)
|
|
14
|
+
* keep the schemaless tail intact. SQLite's json_extract enables indexed
|
|
15
|
+
* queries on common fields without a rigid schema upgrade per adapter.
|
|
16
|
+
* - source.adapter + source.originalId are extracted to indexed virtual
|
|
17
|
+
* columns for fast (adapter, originalId) dedup lookups during sync.
|
|
18
|
+
* - WAL mode is set at vault open, not here, so it survives re-opens.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
"use strict";
|
|
22
|
+
|
|
23
|
+
const INITIAL_DDL = [
|
|
24
|
+
// ── _meta: schema version + vault-level state ───────────────────────────
|
|
25
|
+
`CREATE TABLE IF NOT EXISTS _meta (
|
|
26
|
+
key TEXT PRIMARY KEY,
|
|
27
|
+
value TEXT NOT NULL,
|
|
28
|
+
updated_at INTEGER NOT NULL
|
|
29
|
+
)`,
|
|
30
|
+
|
|
31
|
+
// ── events ──────────────────────────────────────────────────────────────
|
|
32
|
+
`CREATE TABLE IF NOT EXISTS events (
|
|
33
|
+
id TEXT PRIMARY KEY,
|
|
34
|
+
subtype TEXT NOT NULL,
|
|
35
|
+
occurred_at INTEGER NOT NULL,
|
|
36
|
+
duration_ms INTEGER,
|
|
37
|
+
actor TEXT,
|
|
38
|
+
participants TEXT,
|
|
39
|
+
place TEXT,
|
|
40
|
+
items TEXT,
|
|
41
|
+
topics TEXT,
|
|
42
|
+
content TEXT NOT NULL,
|
|
43
|
+
source_adapter TEXT NOT NULL,
|
|
44
|
+
source_original_id TEXT,
|
|
45
|
+
source TEXT NOT NULL,
|
|
46
|
+
extra TEXT,
|
|
47
|
+
ingested_at INTEGER NOT NULL,
|
|
48
|
+
confidence REAL
|
|
49
|
+
)`,
|
|
50
|
+
`CREATE INDEX IF NOT EXISTS idx_events_subtype ON events(subtype)`,
|
|
51
|
+
`CREATE INDEX IF NOT EXISTS idx_events_occurred ON events(occurred_at)`,
|
|
52
|
+
`CREATE INDEX IF NOT EXISTS idx_events_actor ON events(actor)`,
|
|
53
|
+
`CREATE INDEX IF NOT EXISTS idx_events_place ON events(place)`,
|
|
54
|
+
`CREATE INDEX IF NOT EXISTS idx_events_adapter ON events(source_adapter)`,
|
|
55
|
+
`CREATE UNIQUE INDEX IF NOT EXISTS uniq_events_source ON events(source_adapter, source_original_id)
|
|
56
|
+
WHERE source_original_id IS NOT NULL`,
|
|
57
|
+
|
|
58
|
+
// ── persons ─────────────────────────────────────────────────────────────
|
|
59
|
+
`CREATE TABLE IF NOT EXISTS persons (
|
|
60
|
+
id TEXT PRIMARY KEY,
|
|
61
|
+
subtype TEXT NOT NULL,
|
|
62
|
+
names TEXT NOT NULL,
|
|
63
|
+
identifiers TEXT,
|
|
64
|
+
relation TEXT,
|
|
65
|
+
notes TEXT,
|
|
66
|
+
source_adapter TEXT NOT NULL,
|
|
67
|
+
source_original_id TEXT,
|
|
68
|
+
source TEXT NOT NULL,
|
|
69
|
+
extra TEXT,
|
|
70
|
+
ingested_at INTEGER NOT NULL,
|
|
71
|
+
confidence REAL
|
|
72
|
+
)`,
|
|
73
|
+
`CREATE INDEX IF NOT EXISTS idx_persons_subtype ON persons(subtype)`,
|
|
74
|
+
`CREATE INDEX IF NOT EXISTS idx_persons_adapter ON persons(source_adapter)`,
|
|
75
|
+
`CREATE UNIQUE INDEX IF NOT EXISTS uniq_persons_source ON persons(source_adapter, source_original_id)
|
|
76
|
+
WHERE source_original_id IS NOT NULL`,
|
|
77
|
+
|
|
78
|
+
// ── places ──────────────────────────────────────────────────────────────
|
|
79
|
+
`CREATE TABLE IF NOT EXISTS places (
|
|
80
|
+
id TEXT PRIMARY KEY,
|
|
81
|
+
name TEXT NOT NULL,
|
|
82
|
+
coordinates_lat REAL,
|
|
83
|
+
coordinates_lng REAL,
|
|
84
|
+
address TEXT,
|
|
85
|
+
category TEXT,
|
|
86
|
+
aliases TEXT NOT NULL,
|
|
87
|
+
source_adapter TEXT NOT NULL,
|
|
88
|
+
source_original_id TEXT,
|
|
89
|
+
source TEXT NOT NULL,
|
|
90
|
+
extra TEXT,
|
|
91
|
+
ingested_at INTEGER NOT NULL,
|
|
92
|
+
confidence REAL
|
|
93
|
+
)`,
|
|
94
|
+
`CREATE INDEX IF NOT EXISTS idx_places_name ON places(name)`,
|
|
95
|
+
`CREATE INDEX IF NOT EXISTS idx_places_category ON places(category)`,
|
|
96
|
+
`CREATE UNIQUE INDEX IF NOT EXISTS uniq_places_source ON places(source_adapter, source_original_id)
|
|
97
|
+
WHERE source_original_id IS NOT NULL`,
|
|
98
|
+
|
|
99
|
+
// ── items ───────────────────────────────────────────────────────────────
|
|
100
|
+
`CREATE TABLE IF NOT EXISTS items (
|
|
101
|
+
id TEXT PRIMARY KEY,
|
|
102
|
+
subtype TEXT NOT NULL,
|
|
103
|
+
name TEXT NOT NULL,
|
|
104
|
+
category TEXT,
|
|
105
|
+
price_value REAL,
|
|
106
|
+
price_currency TEXT,
|
|
107
|
+
merchant TEXT,
|
|
108
|
+
external_url TEXT,
|
|
109
|
+
thumbnail_local_path TEXT,
|
|
110
|
+
source_adapter TEXT NOT NULL,
|
|
111
|
+
source_original_id TEXT,
|
|
112
|
+
source TEXT NOT NULL,
|
|
113
|
+
extra TEXT,
|
|
114
|
+
ingested_at INTEGER NOT NULL,
|
|
115
|
+
confidence REAL
|
|
116
|
+
)`,
|
|
117
|
+
`CREATE INDEX IF NOT EXISTS idx_items_subtype ON items(subtype)`,
|
|
118
|
+
`CREATE INDEX IF NOT EXISTS idx_items_merchant ON items(merchant)`,
|
|
119
|
+
`CREATE UNIQUE INDEX IF NOT EXISTS uniq_items_source ON items(source_adapter, source_original_id)
|
|
120
|
+
WHERE source_original_id IS NOT NULL`,
|
|
121
|
+
|
|
122
|
+
// ── topics ──────────────────────────────────────────────────────────────
|
|
123
|
+
`CREATE TABLE IF NOT EXISTS topics (
|
|
124
|
+
id TEXT PRIMARY KEY,
|
|
125
|
+
name TEXT NOT NULL,
|
|
126
|
+
parent_topic TEXT,
|
|
127
|
+
derived_from_events TEXT,
|
|
128
|
+
source_adapter TEXT NOT NULL,
|
|
129
|
+
source_original_id TEXT,
|
|
130
|
+
source TEXT NOT NULL,
|
|
131
|
+
extra TEXT,
|
|
132
|
+
ingested_at INTEGER NOT NULL,
|
|
133
|
+
confidence REAL
|
|
134
|
+
)`,
|
|
135
|
+
`CREATE INDEX IF NOT EXISTS idx_topics_name ON topics(name)`,
|
|
136
|
+
`CREATE INDEX IF NOT EXISTS idx_topics_parent ON topics(parent_topic)`,
|
|
137
|
+
|
|
138
|
+
// ── sync_watermarks ─────────────────────────────────────────────────────
|
|
139
|
+
// Per-adapter (and optional scope) progress markers so re-syncs are incremental.
|
|
140
|
+
// Examples of scope:
|
|
141
|
+
// email-imap: "<accountId>:INBOX"
|
|
142
|
+
// wechat: "<talker-wxid>"
|
|
143
|
+
// alipay: "" (single global)
|
|
144
|
+
`CREATE TABLE IF NOT EXISTS sync_watermarks (
|
|
145
|
+
adapter TEXT NOT NULL,
|
|
146
|
+
scope TEXT NOT NULL DEFAULT '',
|
|
147
|
+
watermark TEXT,
|
|
148
|
+
last_synced_at INTEGER,
|
|
149
|
+
last_status TEXT,
|
|
150
|
+
last_error TEXT,
|
|
151
|
+
PRIMARY KEY (adapter, scope)
|
|
152
|
+
)`,
|
|
153
|
+
|
|
154
|
+
// ── audit_log ───────────────────────────────────────────────────────────
|
|
155
|
+
// Every read of personal data + every adapter sync + every key rotation
|
|
156
|
+
// gets a row here. UI surfaces this for the "data lineage" view promised
|
|
157
|
+
// in the architecture doc §11.1.
|
|
158
|
+
`CREATE TABLE IF NOT EXISTS audit_log (
|
|
159
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
160
|
+
at INTEGER NOT NULL,
|
|
161
|
+
action TEXT NOT NULL,
|
|
162
|
+
target TEXT,
|
|
163
|
+
details TEXT
|
|
164
|
+
)`,
|
|
165
|
+
`CREATE INDEX IF NOT EXISTS idx_audit_at ON audit_log(at)`,
|
|
166
|
+
`CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action)`,
|
|
167
|
+
|
|
168
|
+
// ── raw_events ──────────────────────────────────────────────────────────
|
|
169
|
+
// Verbatim adapter payload, primary key (adapter, originalId). Lets us
|
|
170
|
+
// re-derive UnifiedSchema rows without re-syncing if normalization logic
|
|
171
|
+
// changes (e.g. parser upgrade).
|
|
172
|
+
`CREATE TABLE IF NOT EXISTS raw_events (
|
|
173
|
+
adapter TEXT NOT NULL,
|
|
174
|
+
original_id TEXT NOT NULL,
|
|
175
|
+
captured_at INTEGER NOT NULL,
|
|
176
|
+
payload TEXT NOT NULL,
|
|
177
|
+
PRIMARY KEY (adapter, original_id)
|
|
178
|
+
)`,
|
|
179
|
+
`CREATE INDEX IF NOT EXISTS idx_raw_captured ON raw_events(captured_at)`,
|
|
180
|
+
];
|
|
181
|
+
|
|
182
|
+
const MIGRATIONS = [
|
|
183
|
+
{
|
|
184
|
+
version: 1,
|
|
185
|
+
description: "Initial UnifiedSchema tables + sync_watermarks + audit_log + raw_events",
|
|
186
|
+
up(db) {
|
|
187
|
+
for (const sql of INITIAL_DDL) db.exec(sql);
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
];
|
|
191
|
+
|
|
192
|
+
const TARGET_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version;
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Apply all pending migrations. Reads the current version from `_meta`,
|
|
196
|
+
* runs each subsequent migration in a transaction, then updates `_meta`.
|
|
197
|
+
* Idempotent — calling on an up-to-date vault is a no-op.
|
|
198
|
+
*
|
|
199
|
+
* Throws if a migration fails (caller should treat as fatal — partial vault
|
|
200
|
+
* state is recoverable from raw_events but is the user's call to make).
|
|
201
|
+
*/
|
|
202
|
+
function applyMigrations(db) {
|
|
203
|
+
// Bootstrap _meta in its own statement — every subsequent migration assumes
|
|
204
|
+
// it exists. Idempotent: CREATE TABLE IF NOT EXISTS.
|
|
205
|
+
db.exec(`CREATE TABLE IF NOT EXISTS _meta (
|
|
206
|
+
key TEXT PRIMARY KEY,
|
|
207
|
+
value TEXT NOT NULL,
|
|
208
|
+
updated_at INTEGER NOT NULL
|
|
209
|
+
)`);
|
|
210
|
+
|
|
211
|
+
const row = db.prepare("SELECT value FROM _meta WHERE key = 'schema_version'").get();
|
|
212
|
+
const current = row ? parseInt(row.value, 10) : 0;
|
|
213
|
+
|
|
214
|
+
for (const m of MIGRATIONS) {
|
|
215
|
+
if (m.version <= current) continue;
|
|
216
|
+
|
|
217
|
+
const runMigration = db.transaction(() => {
|
|
218
|
+
m.up(db);
|
|
219
|
+
const now = Date.now();
|
|
220
|
+
db.prepare(
|
|
221
|
+
`INSERT INTO _meta (key, value, updated_at) VALUES ('schema_version', ?, ?)
|
|
222
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at`
|
|
223
|
+
).run(String(m.version), now);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
runMigration();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return { previous: current, current: TARGET_VERSION };
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function getSchemaVersion(db) {
|
|
233
|
+
try {
|
|
234
|
+
const row = db.prepare("SELECT value FROM _meta WHERE key = 'schema_version'").get();
|
|
235
|
+
return row ? parseInt(row.value, 10) : 0;
|
|
236
|
+
} catch (_err) {
|
|
237
|
+
return 0;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
module.exports = {
|
|
242
|
+
MIGRATIONS,
|
|
243
|
+
TARGET_VERSION,
|
|
244
|
+
applyMigrations,
|
|
245
|
+
getSchemaVersion,
|
|
246
|
+
};
|