@chainlesschain/personal-data-hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -0
- package/__tests__/adapter-spec.test.js +78 -0
- package/__tests__/adapters/email-adapter.test.js +605 -0
- package/__tests__/adapters/email-imap-session.test.js +334 -0
- package/__tests__/adapters/email-parser.test.js +244 -0
- package/__tests__/adapters/email-providers.test.js +84 -0
- package/__tests__/analysis.test.js +302 -0
- package/__tests__/batch.test.js +133 -0
- package/__tests__/bridges-cc-kg.test.js +231 -0
- package/__tests__/bridges-cc-llm.test.js +191 -0
- package/__tests__/bridges-cc-rag.test.js +162 -0
- package/__tests__/ids.test.js +45 -0
- package/__tests__/key-providers.test.js +126 -0
- package/__tests__/kg-derive.test.js +219 -0
- package/__tests__/llm-client.test.js +122 -0
- package/__tests__/mock-adapter.test.js +93 -0
- package/__tests__/prompt-builder.test.js +204 -0
- package/__tests__/query-parser.test.js +150 -0
- package/__tests__/rag-derive.test.js +169 -0
- package/__tests__/registry.test.js +304 -0
- package/__tests__/schemas.test.js +331 -0
- package/__tests__/vault.test.js +506 -0
- package/lib/adapter-spec.js +155 -0
- package/lib/adapters/email-imap/email-adapter.js +398 -0
- package/lib/adapters/email-imap/email-parser.js +177 -0
- package/lib/adapters/email-imap/imap-session.js +294 -0
- package/lib/adapters/email-imap/index.js +26 -0
- package/lib/adapters/email-imap/providers.js +111 -0
- package/lib/analysis.js +226 -0
- package/lib/batch.js +123 -0
- package/lib/bridges/cc-kg-sink.js +264 -0
- package/lib/bridges/cc-llm-adapter.js +169 -0
- package/lib/bridges/cc-rag-sink.js +118 -0
- package/lib/bridges/index.js +44 -0
- package/lib/constants.js +92 -0
- package/lib/ids.js +103 -0
- package/lib/index.js +141 -0
- package/lib/key-providers.js +146 -0
- package/lib/kg-derive.js +214 -0
- package/lib/llm-client.js +171 -0
- package/lib/migrations.js +246 -0
- package/lib/mock-adapter.js +199 -0
- package/lib/prompt-builder.js +205 -0
- package/lib/query-parser.js +250 -0
- package/lib/rag-derive.js +186 -0
- package/lib/registry.js +398 -0
- package/lib/schemas.js +379 -0
- package/lib/vault.js +883 -0
- package/package.json +63 -0
- package/vitest.config.js +10 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, afterEach } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const os = require("node:os");
|
|
7
|
+
const path = require("node:path");
|
|
8
|
+
|
|
9
|
+
const { LocalVault } = require("../lib/vault");
|
|
10
|
+
const { generateKeyHex } = require("../lib/key-providers");
|
|
11
|
+
const { newId } = require("../lib/ids");
|
|
12
|
+
const { AnalysisEngine } = require("../lib/analysis");
|
|
13
|
+
const { MockLLMClient } = require("../lib/llm-client");
|
|
14
|
+
|
|
15
|
+
// ─── Scaffolding ─────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
let tmpDir;
|
|
18
|
+
let vault;
|
|
19
|
+
|
|
20
|
+
function freshVault() {
|
|
21
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "pdh-ana-"));
|
|
22
|
+
vault = new LocalVault({
|
|
23
|
+
path: path.join(tmpDir, "vault.db"),
|
|
24
|
+
key: generateKeyHex(),
|
|
25
|
+
skipAudit: true,
|
|
26
|
+
});
|
|
27
|
+
vault.open();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
if (vault) { try { vault.close(); } catch (_e) {} vault = null; }
|
|
32
|
+
if (tmpDir && fs.existsSync(tmpDir)) fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const ts = (year, month0, day, hour = 12) =>
|
|
36
|
+
new Date(year, month0, day, hour, 0, 0, 0).getTime();
|
|
37
|
+
|
|
38
|
+
const source = (adapter = "taobao", originalId) => ({
|
|
39
|
+
adapter,
|
|
40
|
+
adapterVersion: "0.1.0",
|
|
41
|
+
capturedAt: ts(2026, 3, 15),
|
|
42
|
+
capturedBy: "api",
|
|
43
|
+
...(originalId ? { originalId } : {}),
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
function seedOrders(vault) {
|
|
47
|
+
// 3 April-2026 orders to mom, 1 in May-2026 to self.
|
|
48
|
+
const e1 = {
|
|
49
|
+
id: newId(),
|
|
50
|
+
type: "event",
|
|
51
|
+
subtype: "order",
|
|
52
|
+
occurredAt: ts(2026, 3, 9),
|
|
53
|
+
actor: "person-self",
|
|
54
|
+
participants: ["person-self", "person-mom"],
|
|
55
|
+
content: {
|
|
56
|
+
title: "蛋白粉 给妈妈",
|
|
57
|
+
amount: { value: 288.5, currency: "CNY", direction: "out" },
|
|
58
|
+
},
|
|
59
|
+
ingestedAt: Date.now(),
|
|
60
|
+
source: source("taobao", "ord-1"),
|
|
61
|
+
};
|
|
62
|
+
const e2 = {
|
|
63
|
+
id: newId(),
|
|
64
|
+
type: "event",
|
|
65
|
+
subtype: "order",
|
|
66
|
+
occurredAt: ts(2026, 3, 12),
|
|
67
|
+
actor: "person-self",
|
|
68
|
+
content: {
|
|
69
|
+
title: "按摩仪 给妈妈",
|
|
70
|
+
amount: { value: 459, currency: "CNY", direction: "out" },
|
|
71
|
+
},
|
|
72
|
+
ingestedAt: Date.now(),
|
|
73
|
+
source: source("taobao", "ord-2"),
|
|
74
|
+
};
|
|
75
|
+
const e3 = {
|
|
76
|
+
id: newId(),
|
|
77
|
+
type: "event",
|
|
78
|
+
subtype: "order",
|
|
79
|
+
occurredAt: ts(2026, 3, 12, 10),
|
|
80
|
+
actor: "person-self",
|
|
81
|
+
content: {
|
|
82
|
+
title: "鲜花 给妈妈生日",
|
|
83
|
+
amount: { value: 199, currency: "CNY", direction: "out" },
|
|
84
|
+
},
|
|
85
|
+
ingestedAt: Date.now(),
|
|
86
|
+
source: source("taobao", "ord-3"),
|
|
87
|
+
};
|
|
88
|
+
const e4 = {
|
|
89
|
+
id: newId(),
|
|
90
|
+
type: "event",
|
|
91
|
+
subtype: "order",
|
|
92
|
+
occurredAt: ts(2026, 4, 5), // May (out of window for "上个月" if now=mid-May)
|
|
93
|
+
actor: "person-self",
|
|
94
|
+
content: {
|
|
95
|
+
title: "电子产品",
|
|
96
|
+
amount: { value: 1599, currency: "CNY", direction: "out" },
|
|
97
|
+
},
|
|
98
|
+
ingestedAt: Date.now(),
|
|
99
|
+
source: source("taobao", "ord-4"),
|
|
100
|
+
};
|
|
101
|
+
vault.putBatch({ events: [e1, e2, e3, e4], persons: [], places: [], items: [], topics: [] });
|
|
102
|
+
return [e1, e2, e3, e4];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const NOW = new Date("2026-05-19T12:00:00Z").getTime();
|
|
106
|
+
|
|
107
|
+
// ─── Construction ────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
describe("AnalysisEngine construction", () => {
|
|
110
|
+
it("requires vault + llm + llm.isLocal", () => {
|
|
111
|
+
expect(() => new AnalysisEngine({})).toThrow(/vault/);
|
|
112
|
+
expect(() => new AnalysisEngine({ vault: {} })).toThrow(/llm/);
|
|
113
|
+
expect(() => new AnalysisEngine({ vault: {}, llm: {} })).toThrow(/chat/);
|
|
114
|
+
expect(() => new AnalysisEngine({
|
|
115
|
+
vault: {},
|
|
116
|
+
llm: { chat: () => {} },
|
|
117
|
+
})).toThrow(/isLocal/);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it("constructs cleanly with mock LLM", () => {
|
|
121
|
+
freshVault();
|
|
122
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
123
|
+
const e = new AnalysisEngine({ vault, llm });
|
|
124
|
+
expect(e.maxFacts).toBe(80);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// ─── Privacy gate ────────────────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
describe("AnalysisEngine privacy gate", () => {
|
|
131
|
+
it("refuses non-local LLM without acceptNonLocal opt-in", async () => {
|
|
132
|
+
freshVault();
|
|
133
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
134
|
+
llm.isLocal = false; // simulate cloud
|
|
135
|
+
const e = new AnalysisEngine({ vault, llm });
|
|
136
|
+
await expect(e.ask("hello")).rejects.toThrow(/non-local/);
|
|
137
|
+
// Explicit opt-in unlocks
|
|
138
|
+
await expect(e.ask("hello", { acceptNonLocal: true })).resolves.toBeDefined();
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ─── E2E: 5 typical questions from architecture-doc §8.1 / §15.1 ────────
|
|
143
|
+
|
|
144
|
+
describe("AnalysisEngine E2E (mock LLM, real vault)", () => {
|
|
145
|
+
it("Q1 sum: '上个月在淘宝总共花了多少?' — facts gathered + cited", async () => {
|
|
146
|
+
freshVault();
|
|
147
|
+
const [e1, e2, e3] = seedOrders(vault);
|
|
148
|
+
|
|
149
|
+
// Mock LLM that cites e1+e2+e3 with the total. We don't compute the sum
|
|
150
|
+
// here — the LLM would do that in production. We assert the engine
|
|
151
|
+
// hands the right facts and correctly validates the citations.
|
|
152
|
+
const llm = new MockLLMClient({
|
|
153
|
+
reply: `上个月你在淘宝下了 3 单:[${e1.id}] [${e2.id}] [${e3.id}],共 ¥946.50。`,
|
|
154
|
+
});
|
|
155
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
156
|
+
const r = await engine.ask("上个月在淘宝总共花了多少?", { now: NOW });
|
|
157
|
+
|
|
158
|
+
expect(r.warning).toBeNull();
|
|
159
|
+
expect(r.citations.length).toBe(3);
|
|
160
|
+
expect(r.citations).toContain(e1.id);
|
|
161
|
+
expect(r.citations).toContain(e2.id);
|
|
162
|
+
expect(r.citations).toContain(e3.id);
|
|
163
|
+
expect(r.hallucinatedCitations).toEqual([]);
|
|
164
|
+
// facts: exactly the 3 April orders (May order excluded by time window)
|
|
165
|
+
expect(r.facts.length).toBe(3);
|
|
166
|
+
expect(r.facts.every((f) => f.subtype === "order")).toBe(true);
|
|
167
|
+
expect(r.parsed.filters.adapter).toBe("taobao");
|
|
168
|
+
expect(r.parsed.intent).toBe("sum-amount");
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
it("Q2 list: '我妈生日那周买了啥' — wider window, mocked LLM cites facts", async () => {
|
|
172
|
+
freshVault();
|
|
173
|
+
const orders = seedOrders(vault);
|
|
174
|
+
const llm = new MockLLMClient({
|
|
175
|
+
reply: `你给妈妈准备了:蛋白粉 [${orders[0].id}]、按摩仪 [${orders[1].id}]、鲜花 [${orders[2].id}]。`,
|
|
176
|
+
});
|
|
177
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
178
|
+
const r = await engine.ask("2026 年 4 月买了什么给妈妈?", { now: NOW });
|
|
179
|
+
|
|
180
|
+
expect(r.facts.length).toBe(3); // April orders
|
|
181
|
+
expect(r.citations.length).toBe(3);
|
|
182
|
+
expect(r.parsed.timeWindow).not.toBeNull();
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it("Q3 no-facts: empty vault yields warning='no-facts'", async () => {
|
|
186
|
+
freshVault();
|
|
187
|
+
const llm = new MockLLMClient({
|
|
188
|
+
reply: "你的本月开销记录是空的。",
|
|
189
|
+
});
|
|
190
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
191
|
+
const r = await engine.ask("本月总共花了多少?", { now: NOW });
|
|
192
|
+
|
|
193
|
+
expect(r.warning).toBe("no-facts");
|
|
194
|
+
expect(r.facts).toEqual([]);
|
|
195
|
+
expect(r.citations).toEqual([]);
|
|
196
|
+
// The mocked answer should still come through unchanged.
|
|
197
|
+
expect(r.answer).toContain("空的");
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it("Q4 hallucination detection: LLM cites unknown ids → warning='hallucinated-citations'", async () => {
|
|
201
|
+
freshVault();
|
|
202
|
+
seedOrders(vault);
|
|
203
|
+
const llm = new MockLLMClient({
|
|
204
|
+
reply: "总计 ¥1234 [evt-fake-id-1] [evt-also-fake-2]。",
|
|
205
|
+
});
|
|
206
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
207
|
+
const r = await engine.ask("上个月在淘宝总共花了多少?", { now: NOW });
|
|
208
|
+
|
|
209
|
+
expect(r.warning).toBe("hallucinated-citations");
|
|
210
|
+
expect(r.hallucinatedCitations).toContain("evt-fake-id-1");
|
|
211
|
+
expect(r.hallucinatedCitations).toContain("evt-also-fake-2");
|
|
212
|
+
expect(r.citations).toEqual([]); // no known ids cited
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("Q5 LLM error propagates: vault stays intact, audit recorded", async () => {
|
|
216
|
+
freshVault();
|
|
217
|
+
seedOrders(vault);
|
|
218
|
+
const llm = new MockLLMClient({});
|
|
219
|
+
llm.chat = async () => { throw new Error("Ollama down"); };
|
|
220
|
+
llm.isLocal = true;
|
|
221
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
222
|
+
|
|
223
|
+
await expect(engine.ask("test", { now: NOW })).rejects.toThrow(/Ollama down/);
|
|
224
|
+
|
|
225
|
+
const audits = vault.queryAudit({ action: "analysis.llm_failed" });
|
|
226
|
+
expect(audits.length).toBe(1);
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it("audits every successful ask with fact + citation counts", async () => {
|
|
230
|
+
freshVault();
|
|
231
|
+
const orders = seedOrders(vault);
|
|
232
|
+
const llm = new MockLLMClient({ reply: `cited [${orders[0].id}]` });
|
|
233
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
234
|
+
await engine.ask("上个月在淘宝总共花了多少?", { now: NOW });
|
|
235
|
+
|
|
236
|
+
const audits = vault.queryAudit({ action: "analysis.ask" });
|
|
237
|
+
expect(audits.length).toBe(1);
|
|
238
|
+
const details = JSON.parse(audits[0].details);
|
|
239
|
+
expect(details.factCount).toBe(3);
|
|
240
|
+
expect(details.citationsKnown).toBe(1);
|
|
241
|
+
expect(details.citationsUnknown).toBe(0);
|
|
242
|
+
expect(details.warning).toBeNull();
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it("skipAudit option suppresses audit row", async () => {
|
|
246
|
+
freshVault();
|
|
247
|
+
seedOrders(vault);
|
|
248
|
+
const llm = new MockLLMClient({ reply: "ok" });
|
|
249
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
250
|
+
await engine.ask("test", { now: NOW, skipAudit: true });
|
|
251
|
+
expect(vault.queryAudit({ action: "analysis.ask" }).length).toBe(0);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// ─── RAG augmentation ────────────────────────────────────────────────────
|
|
256
|
+
|
|
257
|
+
describe("AnalysisEngine RAG retriever", () => {
|
|
258
|
+
it("adds RAG-retrieved events to facts (by id lookup in vault)", async () => {
|
|
259
|
+
freshVault();
|
|
260
|
+
const orders = seedOrders(vault);
|
|
261
|
+
|
|
262
|
+
// RAG returns the May order (which falls OUTSIDE the "上个月" time window)
|
|
263
|
+
// — engine should still include it because RAG marks it semantically
|
|
264
|
+
// relevant.
|
|
265
|
+
const ragRetriever = async () => [{ id: orders[3].id, text: "fake", metadata: {} }];
|
|
266
|
+
|
|
267
|
+
const llm = new MockLLMClient({ reply: "ok" });
|
|
268
|
+
const engine = new AnalysisEngine({ vault, llm, ragRetriever });
|
|
269
|
+
const r = await engine.ask("上个月在淘宝总共花了多少?", { now: NOW });
|
|
270
|
+
|
|
271
|
+
// Original 3 April orders + 1 May order pulled by RAG.
|
|
272
|
+
expect(r.facts.length).toBe(4);
|
|
273
|
+
expect(r.ragContextIds).toEqual([orders[3].id]);
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
it("RAG failure is captured but doesn't abort the ask", async () => {
|
|
277
|
+
freshVault();
|
|
278
|
+
seedOrders(vault);
|
|
279
|
+
const ragRetriever = async () => { throw new Error("qdrant unreachable"); };
|
|
280
|
+
const llm = new MockLLMClient({ reply: "ok" });
|
|
281
|
+
const engine = new AnalysisEngine({ vault, llm, ragRetriever });
|
|
282
|
+
|
|
283
|
+
const r = await engine.ask("test", { now: NOW });
|
|
284
|
+
expect(r.answer).toBe("ok");
|
|
285
|
+
const audits = vault.queryAudit({ action: "analysis.rag_failed" });
|
|
286
|
+
expect(audits.length).toBe(1);
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// ─── Empty / bad input ────────────────────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
describe("AnalysisEngine input validation", () => {
|
|
293
|
+
it("rejects empty / non-string question", async () => {
|
|
294
|
+
freshVault();
|
|
295
|
+
const engine = new AnalysisEngine({
|
|
296
|
+
vault,
|
|
297
|
+
llm: new MockLLMClient({ reply: "" }),
|
|
298
|
+
});
|
|
299
|
+
await expect(engine.ask("")).rejects.toThrow(/non-empty/);
|
|
300
|
+
await expect(engine.ask(null)).rejects.toThrow();
|
|
301
|
+
});
|
|
302
|
+
});
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { emptyBatch, mergeBatches, validateBatch, partitionBatch } = require("../lib/batch");
|
|
6
|
+
const { newId } = require("../lib/ids");
|
|
7
|
+
|
|
8
|
+
const source = () => ({
|
|
9
|
+
adapter: "test",
|
|
10
|
+
adapterVersion: "0.1.0",
|
|
11
|
+
capturedAt: Date.now(),
|
|
12
|
+
capturedBy: "api",
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
const goodEvent = () => ({
|
|
16
|
+
id: newId(),
|
|
17
|
+
type: "event",
|
|
18
|
+
subtype: "order",
|
|
19
|
+
occurredAt: Date.now(),
|
|
20
|
+
ingestedAt: Date.now(),
|
|
21
|
+
content: { title: "X" },
|
|
22
|
+
source: source(),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
const goodPerson = () => ({
|
|
26
|
+
id: newId(),
|
|
27
|
+
type: "person",
|
|
28
|
+
subtype: "merchant",
|
|
29
|
+
names: ["美团"],
|
|
30
|
+
ingestedAt: Date.now(),
|
|
31
|
+
source: source(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const badEvent = () => ({
|
|
35
|
+
// missing required occurredAt + source + content
|
|
36
|
+
id: newId(),
|
|
37
|
+
type: "event",
|
|
38
|
+
subtype: "order",
|
|
39
|
+
ingestedAt: Date.now(),
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
describe("emptyBatch", () => {
|
|
43
|
+
it("returns shape with all 5 entity arrays", () => {
|
|
44
|
+
const b = emptyBatch();
|
|
45
|
+
expect(b).toEqual({ events: [], persons: [], places: [], items: [], topics: [] });
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
describe("mergeBatches", () => {
|
|
50
|
+
it("concatenates all 5 arrays", () => {
|
|
51
|
+
const a = { ...emptyBatch(), events: [goodEvent()], persons: [goodPerson()] };
|
|
52
|
+
const b = { ...emptyBatch(), events: [goodEvent()] };
|
|
53
|
+
const merged = mergeBatches(a, b);
|
|
54
|
+
expect(merged.events.length).toBe(2);
|
|
55
|
+
expect(merged.persons.length).toBe(1);
|
|
56
|
+
expect(merged.places.length).toBe(0);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("tolerates missing arrays in either input", () => {
|
|
60
|
+
const merged = mergeBatches({}, { persons: [goodPerson()] });
|
|
61
|
+
expect(merged.persons.length).toBe(1);
|
|
62
|
+
expect(merged.events.length).toBe(0);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe("validateBatch", () => {
|
|
67
|
+
it("returns valid=true for an all-good batch", () => {
|
|
68
|
+
const r = validateBatch({ events: [goodEvent(), goodEvent()], persons: [goodPerson()] });
|
|
69
|
+
expect(r.valid).toBe(true);
|
|
70
|
+
expect(r.entityCount).toBe(3);
|
|
71
|
+
expect(r.errorCount).toBe(0);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("collects all errors instead of throwing", () => {
|
|
75
|
+
const r = validateBatch({ events: [goodEvent(), badEvent(), badEvent()] });
|
|
76
|
+
expect(r.valid).toBe(false);
|
|
77
|
+
expect(r.entityCount).toBe(3);
|
|
78
|
+
expect(r.errorCount).toBe(2);
|
|
79
|
+
expect(r.errors.every((e) => e.kind === "events")).toBe(true);
|
|
80
|
+
expect(r.errors[0]).toHaveProperty("index");
|
|
81
|
+
expect(r.errors[0].errors.length).toBeGreaterThan(0);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("rejects non-array entity buckets", () => {
|
|
85
|
+
const r = validateBatch({ events: "not-an-array" });
|
|
86
|
+
expect(r.valid).toBe(false);
|
|
87
|
+
expect(r.errors[0].errors[0]).toMatch(/array/);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("rejects non-object batch", () => {
|
|
91
|
+
expect(validateBatch(null).valid).toBe(false);
|
|
92
|
+
expect(validateBatch("string").valid).toBe(false);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("accepts batch with only some entity types populated", () => {
|
|
96
|
+
const r = validateBatch({ persons: [goodPerson()] });
|
|
97
|
+
expect(r.valid).toBe(true);
|
|
98
|
+
expect(r.entityCount).toBe(1);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe("partitionBatch", () => {
|
|
103
|
+
it("separates valid from invalid into distinct sub-batches", () => {
|
|
104
|
+
const gE = goodEvent();
|
|
105
|
+
const bE = badEvent();
|
|
106
|
+
const gP = goodPerson();
|
|
107
|
+
|
|
108
|
+
const { valid, invalid, invalidReasons } = partitionBatch({
|
|
109
|
+
events: [gE, bE],
|
|
110
|
+
persons: [gP],
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
expect(valid.events).toEqual([gE]);
|
|
114
|
+
expect(valid.persons).toEqual([gP]);
|
|
115
|
+
expect(invalid.events).toEqual([bE]);
|
|
116
|
+
expect(invalid.persons).toEqual([]);
|
|
117
|
+
expect(invalidReasons.length).toBe(1);
|
|
118
|
+
expect(invalidReasons[0].kind).toBe("events");
|
|
119
|
+
expect(invalidReasons[0].id).toBe(bE.id);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("returns empty invalid sub-batch when all rows are valid", () => {
|
|
123
|
+
const { valid, invalid, invalidReasons } = partitionBatch({
|
|
124
|
+
events: [goodEvent()],
|
|
125
|
+
persons: [goodPerson()],
|
|
126
|
+
});
|
|
127
|
+
expect(invalid.events).toEqual([]);
|
|
128
|
+
expect(invalid.persons).toEqual([]);
|
|
129
|
+
expect(invalidReasons).toEqual([]);
|
|
130
|
+
expect(valid.events.length).toBe(1);
|
|
131
|
+
expect(valid.persons.length).toBe(1);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { CcKgSink, HUB_TO_CC_TYPE } = require("../lib/bridges/cc-kg-sink");
|
|
6
|
+
|
|
7
|
+
// ─── Fake cc KG (mirrors the real addEntity/addRelation signature) ──────
|
|
8
|
+
|
|
9
|
+
function makeFakeKg() {
|
|
10
|
+
const entities = new Map();
|
|
11
|
+
const relations = [];
|
|
12
|
+
return {
|
|
13
|
+
addEntity(_db, cfg) {
|
|
14
|
+
if (!cfg || !cfg.id || !cfg.name || !cfg.type) {
|
|
15
|
+
throw new Error("missing required fields");
|
|
16
|
+
}
|
|
17
|
+
if (entities.has(cfg.id)) {
|
|
18
|
+
throw new Error(`Entity already exists: ${cfg.id}`);
|
|
19
|
+
}
|
|
20
|
+
entities.set(cfg.id, { ...cfg });
|
|
21
|
+
return cfg;
|
|
22
|
+
},
|
|
23
|
+
addRelation(_db, cfg) {
|
|
24
|
+
if (!entities.has(cfg.sourceId)) throw new Error(`source not found: ${cfg.sourceId}`);
|
|
25
|
+
if (!entities.has(cfg.targetId)) throw new Error(`target not found: ${cfg.targetId}`);
|
|
26
|
+
relations.push({ ...cfg });
|
|
27
|
+
return cfg;
|
|
28
|
+
},
|
|
29
|
+
entities,
|
|
30
|
+
relations,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const t = (subject, predicate, opts) => {
|
|
35
|
+
const out = { subject, predicate };
|
|
36
|
+
if (opts && opts.object) out.object = opts.object;
|
|
37
|
+
else if (opts && opts.literal !== undefined) out.literal = opts.literal;
|
|
38
|
+
return out;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// ─── Tests ──────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
describe("CcKgSink construction", () => {
|
|
44
|
+
it("requires addEntity + addRelation", () => {
|
|
45
|
+
expect(() => new CcKgSink()).toThrow();
|
|
46
|
+
expect(() => new CcKgSink({ addRelation: () => {} })).toThrow(/addEntity/);
|
|
47
|
+
expect(() => new CcKgSink({ addEntity: () => {} })).toThrow(/addRelation/);
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe("CcKgSink.write entity creation", () => {
|
|
52
|
+
it("creates entities from rdf:type + has-name triples", async () => {
|
|
53
|
+
const cc = makeFakeKg();
|
|
54
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
55
|
+
const r = await sink.write([
|
|
56
|
+
t("p1", "rdf:type", { literal: "person" }),
|
|
57
|
+
t("p1", "subtype", { literal: "contact" }),
|
|
58
|
+
t("p1", "has-name", { literal: "妈妈" }),
|
|
59
|
+
t("p1", "has-name", { literal: "陈某某" }),
|
|
60
|
+
t("p1", "id:phone", { literal: "13800001111" }),
|
|
61
|
+
]);
|
|
62
|
+
expect(r.entitiesUpserted).toBe(1);
|
|
63
|
+
const e = cc.entities.get("p1");
|
|
64
|
+
expect(e.type).toBe("Person");
|
|
65
|
+
expect(e.name).toBe("妈妈"); // first name wins
|
|
66
|
+
expect(e.properties.subtype).toBe("contact");
|
|
67
|
+
expect(e.properties.hubKind).toBe("person");
|
|
68
|
+
expect(e.properties.aliases).toEqual(["陈某某"]);
|
|
69
|
+
expect(e.properties["id:phone"]).toBe("13800001111");
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("maps hub place → cc Concept with hubKind", async () => {
|
|
73
|
+
const cc = makeFakeKg();
|
|
74
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
75
|
+
await sink.write([
|
|
76
|
+
t("pl1", "rdf:type", { literal: "place" }),
|
|
77
|
+
t("pl1", "has-name", { literal: "妈妈家" }),
|
|
78
|
+
t("pl1", "located-at", { literal: "24.5,118.1" }),
|
|
79
|
+
]);
|
|
80
|
+
const e = cc.entities.get("pl1");
|
|
81
|
+
expect(e.type).toBe("Concept");
|
|
82
|
+
expect(e.properties.hubKind).toBe("place");
|
|
83
|
+
expect(e.properties["located-at"]).toBe("24.5,118.1");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("maps hub item / topic → cc Concept", async () => {
|
|
87
|
+
const cc = makeFakeKg();
|
|
88
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
89
|
+
await sink.write([
|
|
90
|
+
t("i1", "rdf:type", { literal: "item" }),
|
|
91
|
+
t("i1", "has-name", { literal: "蛋白粉" }),
|
|
92
|
+
t("t1", "rdf:type", { literal: "topic" }),
|
|
93
|
+
t("t1", "has-name", { literal: "母亲健康" }),
|
|
94
|
+
]);
|
|
95
|
+
expect(cc.entities.get("i1").type).toBe("Concept");
|
|
96
|
+
expect(cc.entities.get("i1").properties.hubKind).toBe("item");
|
|
97
|
+
expect(cc.entities.get("t1").type).toBe("Concept");
|
|
98
|
+
expect(cc.entities.get("t1").properties.hubKind).toBe("topic");
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it("falls back to subject id when no has-name (uses Event subject as name)", async () => {
|
|
102
|
+
const cc = makeFakeKg();
|
|
103
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
104
|
+
await sink.write([
|
|
105
|
+
t("evt-x", "rdf:type", { literal: "event" }),
|
|
106
|
+
t("evt-x", "subtype", { literal: "order" }),
|
|
107
|
+
]);
|
|
108
|
+
expect(cc.entities.get("evt-x").name).toBe("evt-x");
|
|
109
|
+
expect(cc.entities.get("evt-x").type).toBe("Event");
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it("treats 'already exists' as upsert success (no error)", async () => {
|
|
113
|
+
const cc = makeFakeKg();
|
|
114
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
115
|
+
await sink.write([
|
|
116
|
+
t("p1", "rdf:type", { literal: "person" }),
|
|
117
|
+
t("p1", "has-name", { literal: "alice" }),
|
|
118
|
+
]);
|
|
119
|
+
// Second write of same subject — cc throws "already exists"
|
|
120
|
+
const r = await sink.write([
|
|
121
|
+
t("p1", "rdf:type", { literal: "person" }),
|
|
122
|
+
t("p1", "has-name", { literal: "alice" }),
|
|
123
|
+
]);
|
|
124
|
+
expect(r.entitiesUpserted).toBe(0);
|
|
125
|
+
expect(r.errors.length).toBe(0); // already-exists treated as upsert hit
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("captures unknown predicates under __extra", async () => {
|
|
129
|
+
const cc = makeFakeKg();
|
|
130
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
131
|
+
await sink.write([
|
|
132
|
+
t("evt-x", "rdf:type", { literal: "event" }),
|
|
133
|
+
t("evt-x", "weird-predicate", { literal: "foo" }),
|
|
134
|
+
]);
|
|
135
|
+
expect(cc.entities.get("evt-x").properties.__extra).toEqual({ "weird-predicate": "foo" });
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it("collects upstream errors (e.g. missing required field)", async () => {
|
|
139
|
+
const cc = makeFakeKg();
|
|
140
|
+
const sink = new CcKgSink({
|
|
141
|
+
addEntity: () => { throw new Error("upstream DB exploded"); },
|
|
142
|
+
addRelation: cc.addRelation,
|
|
143
|
+
});
|
|
144
|
+
const r = await sink.write([t("p1", "rdf:type", { literal: "person" })]);
|
|
145
|
+
expect(r.entitiesUpserted).toBe(0);
|
|
146
|
+
expect(r.errors.length).toBe(1);
|
|
147
|
+
expect(r.errors[0].error).toContain("upstream DB exploded");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
describe("CcKgSink.write relation creation", () => {
|
|
152
|
+
it("adds relations between created entities", async () => {
|
|
153
|
+
const cc = makeFakeKg();
|
|
154
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
155
|
+
const r = await sink.write([
|
|
156
|
+
t("evt-1", "rdf:type", { literal: "event" }),
|
|
157
|
+
t("evt-1", "subtype", { literal: "payment" }),
|
|
158
|
+
t("p1", "rdf:type", { literal: "person" }),
|
|
159
|
+
t("p1", "has-name", { literal: "mom" }),
|
|
160
|
+
t("evt-1", "by", { object: "p1" }),
|
|
161
|
+
t("evt-1", "involves", { object: "p1" }),
|
|
162
|
+
]);
|
|
163
|
+
expect(r.entitiesUpserted).toBe(2);
|
|
164
|
+
expect(r.relationsAdded).toBe(2);
|
|
165
|
+
expect(cc.relations).toContainEqual({ sourceId: "evt-1", targetId: "p1", relationType: "by" });
|
|
166
|
+
expect(cc.relations).toContainEqual({ sourceId: "evt-1", targetId: "p1", relationType: "involves" });
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("skips relation when endpoint not in KG (no dangling refs)", async () => {
|
|
170
|
+
const cc = makeFakeKg();
|
|
171
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
172
|
+
const r = await sink.write([
|
|
173
|
+
t("evt-1", "rdf:type", { literal: "event" }),
|
|
174
|
+
// p-missing was never declared with rdf:type
|
|
175
|
+
t("evt-1", "by", { object: "p-missing" }),
|
|
176
|
+
]);
|
|
177
|
+
expect(r.relationsAdded).toBe(0);
|
|
178
|
+
expect(r.errors.length).toBe(1);
|
|
179
|
+
expect(r.errors[0].error).toContain("endpoint not in KG");
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("rejects unknown predicates (defensive — don't poison cc KG)", async () => {
|
|
183
|
+
const cc = makeFakeKg();
|
|
184
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
185
|
+
await sink.write([t("e", "rdf:type", { literal: "event" })]);
|
|
186
|
+
await sink.write([t("p", "rdf:type", { literal: "person" })]);
|
|
187
|
+
const r = await sink.write([t("e", "frobnicate", { object: "p" })]);
|
|
188
|
+
expect(r.relationsAdded).toBe(0);
|
|
189
|
+
expect(r.errors[0].error).toBe("unknown predicate");
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it("tolerates duplicate-relation errors as success", async () => {
|
|
193
|
+
const cc = makeFakeKg();
|
|
194
|
+
const sink = new CcKgSink({
|
|
195
|
+
addEntity: cc.addEntity,
|
|
196
|
+
addRelation: () => { throw new Error("Relation already exists"); },
|
|
197
|
+
});
|
|
198
|
+
await sink.write([
|
|
199
|
+
t("e", "rdf:type", { literal: "event" }),
|
|
200
|
+
t("p", "rdf:type", { literal: "person" }),
|
|
201
|
+
]);
|
|
202
|
+
const r = await sink.write([t("e", "by", { object: "p" })]);
|
|
203
|
+
// Even though addRelation throws "already exists", sink treats as success.
|
|
204
|
+
expect(r.errors.length).toBe(0);
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
describe("CcKgSink edge cases", () => {
|
|
209
|
+
it("returns zeros for empty input", async () => {
|
|
210
|
+
const cc = makeFakeKg();
|
|
211
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
212
|
+
const r = await sink.write([]);
|
|
213
|
+
expect(r).toEqual({ entitiesUpserted: 0, relationsAdded: 0, errors: [] });
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("ignores malformed triples (missing subject/predicate)", async () => {
|
|
217
|
+
const cc = makeFakeKg();
|
|
218
|
+
const sink = new CcKgSink({ addEntity: cc.addEntity, addRelation: cc.addRelation });
|
|
219
|
+
const r = await sink.write([{ predicate: "rdf:type", literal: "person" }, null, undefined]);
|
|
220
|
+
expect(r.entitiesUpserted).toBe(0);
|
|
221
|
+
expect(cc.entities.size).toBe(0);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
it("HUB_TO_CC_TYPE exposes the documented mapping", () => {
|
|
225
|
+
expect(HUB_TO_CC_TYPE.person).toBe("Person");
|
|
226
|
+
expect(HUB_TO_CC_TYPE.event).toBe("Event");
|
|
227
|
+
expect(HUB_TO_CC_TYPE.place).toBe("Concept");
|
|
228
|
+
expect(HUB_TO_CC_TYPE.item).toBe("Concept");
|
|
229
|
+
expect(HUB_TO_CC_TYPE.topic).toBe("Concept");
|
|
230
|
+
});
|
|
231
|
+
});
|