@chainlesschain/personal-data-hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -0
- package/__tests__/adapter-spec.test.js +78 -0
- package/__tests__/adapters/email-adapter.test.js +605 -0
- package/__tests__/adapters/email-imap-session.test.js +334 -0
- package/__tests__/adapters/email-parser.test.js +244 -0
- package/__tests__/adapters/email-providers.test.js +84 -0
- package/__tests__/analysis.test.js +302 -0
- package/__tests__/batch.test.js +133 -0
- package/__tests__/bridges-cc-kg.test.js +231 -0
- package/__tests__/bridges-cc-llm.test.js +191 -0
- package/__tests__/bridges-cc-rag.test.js +162 -0
- package/__tests__/ids.test.js +45 -0
- package/__tests__/key-providers.test.js +126 -0
- package/__tests__/kg-derive.test.js +219 -0
- package/__tests__/llm-client.test.js +122 -0
- package/__tests__/mock-adapter.test.js +93 -0
- package/__tests__/prompt-builder.test.js +204 -0
- package/__tests__/query-parser.test.js +150 -0
- package/__tests__/rag-derive.test.js +169 -0
- package/__tests__/registry.test.js +304 -0
- package/__tests__/schemas.test.js +331 -0
- package/__tests__/vault.test.js +506 -0
- package/lib/adapter-spec.js +155 -0
- package/lib/adapters/email-imap/email-adapter.js +398 -0
- package/lib/adapters/email-imap/email-parser.js +177 -0
- package/lib/adapters/email-imap/imap-session.js +294 -0
- package/lib/adapters/email-imap/index.js +26 -0
- package/lib/adapters/email-imap/providers.js +111 -0
- package/lib/analysis.js +226 -0
- package/lib/batch.js +123 -0
- package/lib/bridges/cc-kg-sink.js +264 -0
- package/lib/bridges/cc-llm-adapter.js +169 -0
- package/lib/bridges/cc-rag-sink.js +118 -0
- package/lib/bridges/index.js +44 -0
- package/lib/constants.js +92 -0
- package/lib/ids.js +103 -0
- package/lib/index.js +141 -0
- package/lib/key-providers.js +146 -0
- package/lib/kg-derive.js +214 -0
- package/lib/llm-client.js +171 -0
- package/lib/migrations.js +246 -0
- package/lib/mock-adapter.js +199 -0
- package/lib/prompt-builder.js +205 -0
- package/lib/query-parser.js +250 -0
- package/lib/rag-derive.js +186 -0
- package/lib/registry.js +398 -0
- package/lib/schemas.js +379 -0
- package/lib/vault.js +883 -0
- package/package.json +63 -0
- package/vitest.config.js +10 -0
package/README.md
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# @chainlesschain/personal-data-hub
|
|
2
|
+
|
|
3
|
+
Personal Data Hub — UnifiedSchema, validators, batch helpers, SQLCipher
|
|
4
|
+
LocalVault, and AdapterRegistry for the "data back to the individual"
|
|
5
|
+
middleware.
|
|
6
|
+
|
|
7
|
+
> **Phase 0 + Phase 1 + Phase 2 + Phase 3 + Phase 3.5 landed** of the 13-phase plan in
|
|
8
|
+
> [`docs/design/Personal_Data_Hub_Architecture.md`](../../docs/design/Personal_Data_Hub_Architecture.md).
|
|
9
|
+
> Phase 0 covers schema + validation + ID generation.
|
|
10
|
+
> Phase 1 adds SQLCipher LocalVault + pluggable key providers + migrations.
|
|
11
|
+
> Phase 2 adds AdapterRegistry + KG/RAG derivation + MockAdapter (1000
|
|
12
|
+
> events ingest in ~600ms — 50× under the 30s target).
|
|
13
|
+
> Phase 3 adds the natural-language AnalysisEngine: query parser → vault
|
|
14
|
+
> facts → prompt builder → LLM → citation validation, with a privacy gate
|
|
15
|
+
> that refuses non-local LLMs unless caller opts in. **MockLLMClient**
|
|
16
|
+
> for tests, **OllamaClient** for standalone use.
|
|
17
|
+
> Phase 3.5 wires production bridges: **CcLLMAdapter** wraps the existing
|
|
18
|
+
> cc llm-manager (Ollama / Volcengine / Anthropic / Gemini / DeepSeek)
|
|
19
|
+
> via dependency injection; **CcKgSink** translates hub triples into the
|
|
20
|
+
> existing knowledge-graph addEntity + addRelation; **CcRagSink** feeds
|
|
21
|
+
> hub RagDocs into BM25 (Qdrant vector store wiring left as future work).
|
|
22
|
+
> Hub package stays decoupled — bridges take cc functions as constructor
|
|
23
|
+
> args rather than importing cc modules directly.
|
|
24
|
+
> Sync engine UI, real KG/RAG wiring, and the actual adapters (Email,
|
|
25
|
+
> Alipay, AI Chat × 8, WeChat, ...) come in later phases.
|
|
26
|
+
|
|
27
|
+
## What's in here
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
lib/
|
|
31
|
+
├── constants.js enum values (entity types, subtypes, capturedBy, ...)
|
|
32
|
+
├── ids.js UUID v7 (hand-rolled RFC 9562, ~30 LOC, no dep)
|
|
33
|
+
├── schemas.js per-entity validators (Person/Event/Place/Item/Topic)
|
|
34
|
+
├── batch.js NormalizedBatch helpers (empty/merge/validate/partition)
|
|
35
|
+
├── migrations.js LocalVault schema (events/persons/places/items/topics
|
|
36
|
+
│ /sync_watermarks/audit_log/raw_events) + versioning
|
|
37
|
+
├── key-providers.js InMemoryKeyProvider + FileKeyProvider + KeyProvider
|
|
38
|
+
│ contract for platform Keystore impls in later phases
|
|
39
|
+
├── vault.js LocalVault — SQLCipher AES-256, transactional putBatch,
|
|
40
|
+
│ typed put/get, queryEvents, watermarks, audit, key
|
|
41
|
+
│ rotation (WAL-safe), destroy
|
|
42
|
+
├── adapter-spec.js PersonalDataAdapter contract + assertAdapter check
|
|
43
|
+
├── kg-derive.js UnifiedSchema → KG triples (rdf:type / by / involves /
|
|
44
|
+
│ happened-at / etc.) — engine-agnostic
|
|
45
|
+
├── rag-derive.js UnifiedSchema → RAG (text, metadata) docs for indexing
|
|
46
|
+
│ into BM25 + vector retrievers
|
|
47
|
+
├── registry.js AdapterRegistry — register/list, syncAdapter with full
|
|
48
|
+
│ pipeline (health → sync → archive raw → normalize →
|
|
49
|
+
│ partition valid/invalid → vault → KG sink → RAG sink
|
|
50
|
+
│ → watermark → audit), syncAll, pluggable kgSink/ragSink
|
|
51
|
+
├── mock-adapter.js reference impl + test fixture (deterministic seeded)
|
|
52
|
+
├── query-parser.js heuristic time-window + filter + intent extraction
|
|
53
|
+
│ from natural-language questions
|
|
54
|
+
├── prompt-builder.js fact summarization + system/user prompt construction
|
|
55
|
+
│ (system prompt is fact-free; facts go in user role as
|
|
56
|
+
│ marked-untrusted JSON) + citation parser + validator
|
|
57
|
+
├── llm-client.js MockLLMClient (tests) + OllamaClient (default standalone)
|
|
58
|
+
│ conforming to the chat({messages}) → {text, usage}
|
|
59
|
+
│ contract. Production plugs in CcLLMAdapter wrapping
|
|
60
|
+
│ the existing desktop-app-vue llm-manager.
|
|
61
|
+
├── analysis.js AnalysisEngine — orchestrates parseQuery → vault facts
|
|
62
|
+
│ (optional RAG augmentation) → buildPrompt → llm.chat →
|
|
63
|
+
│ parseCitations → validateCitations → audit. Hard
|
|
64
|
+
│ privacy gate refuses non-local LLMs without opt-in.
|
|
65
|
+
├── bridges/
|
|
66
|
+
│ ├── cc-llm-adapter.js wraps cc llm-manager.chat → LLMClient
|
|
67
|
+
│ ├── cc-kg-sink.js hub triples → cc addEntity + addRelation
|
|
68
|
+
│ ├── cc-rag-sink.js hub RagDocs → cc BM25 (+ optional vector)
|
|
69
|
+
│ └── index.js re-exports
|
|
70
|
+
└── index.js re-exports
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## The 5 core entities
|
|
74
|
+
|
|
75
|
+
Mirrors §5.1 of the design doc. Every adapter normalizes its raw rows into
|
|
76
|
+
these five types so the KG / RAG / analysis layers see a consistent shape.
|
|
77
|
+
|
|
78
|
+
| Type | Examples |
|
|
79
|
+
|--------|------------------------------------------------|
|
|
80
|
+
| Person | self / contact / merchant / ai-agent |
|
|
81
|
+
| Event | message / order / payment / visit / post / ai-message / ai-image-generation / ... |
|
|
82
|
+
| Place | home / restaurant / mom's place |
|
|
83
|
+
| Item | product / link / media / document |
|
|
84
|
+
| Topic | "mom's health" / "Python learning" / "AI conversation with DeepSeek" |
|
|
85
|
+
|
|
86
|
+
All entities share `BaseEntity` fields:
|
|
87
|
+
- `id` — UUID v7 (time-ordered)
|
|
88
|
+
- `source` — `{ adapter, adapterVersion, capturedAt, capturedBy, originalId? }`
|
|
89
|
+
- `ingestedAt` — ms timestamp
|
|
90
|
+
- `confidence` — 0..1, optional
|
|
91
|
+
- `extra` — schemaless bag for adapter-specific fields
|
|
92
|
+
|
|
93
|
+
## Usage
|
|
94
|
+
|
|
95
|
+
```js
|
|
96
|
+
const {
|
|
97
|
+
newId,
|
|
98
|
+
validate,
|
|
99
|
+
validatePerson,
|
|
100
|
+
validateBatch,
|
|
101
|
+
partitionBatch,
|
|
102
|
+
PERSON_SUBTYPES,
|
|
103
|
+
EVENT_SUBTYPES,
|
|
104
|
+
} = require("@chainlesschain/personal-data-hub");
|
|
105
|
+
|
|
106
|
+
const person = {
|
|
107
|
+
id: newId(),
|
|
108
|
+
type: "person",
|
|
109
|
+
subtype: PERSON_SUBTYPES.CONTACT,
|
|
110
|
+
names: ["妈妈", "陈某某"],
|
|
111
|
+
identifiers: { phone: ["138-0000-1111"] },
|
|
112
|
+
ingestedAt: Date.now(),
|
|
113
|
+
source: {
|
|
114
|
+
adapter: "wechat",
|
|
115
|
+
adapterVersion: "0.1.0",
|
|
116
|
+
capturedAt: Date.now(),
|
|
117
|
+
capturedBy: "sqlite",
|
|
118
|
+
originalId: "wxid_xyz",
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const { valid, errors } = validatePerson(person);
|
|
123
|
+
// → { valid: true, errors: [] }
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Validators never throw
|
|
127
|
+
|
|
128
|
+
All validators return `{ valid: boolean, errors: string[] }`. This lets the
|
|
129
|
+
adapter ingest pipeline collect every bad row in one pass and ship them to
|
|
130
|
+
a review queue instead of failing the whole sync window on the first corrupt
|
|
131
|
+
entry from a flaky third-party data source.
|
|
132
|
+
|
|
133
|
+
```js
|
|
134
|
+
const { partitionBatch } = require("@chainlesschain/personal-data-hub");
|
|
135
|
+
|
|
136
|
+
const { valid, invalid, invalidReasons } = partitionBatch(rawBatch);
|
|
137
|
+
// commit `valid` to vault, spool `invalid` to review queue
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## LocalVault quick demo
|
|
141
|
+
|
|
142
|
+
```js
|
|
143
|
+
const fs = require("fs"), os = require("os"), path = require("path");
|
|
144
|
+
const {
|
|
145
|
+
LocalVault, generateKeyHex, newId, emptyBatch,
|
|
146
|
+
PERSON_SUBTYPES, EVENT_SUBTYPES,
|
|
147
|
+
} = require("@chainlesschain/personal-data-hub");
|
|
148
|
+
|
|
149
|
+
const v = new LocalVault({
|
|
150
|
+
path: path.join(os.homedir(), ".chainlesschain", "hub.db"),
|
|
151
|
+
key: generateKeyHex(), // production: pull from a KeyProvider
|
|
152
|
+
});
|
|
153
|
+
v.open();
|
|
154
|
+
|
|
155
|
+
const now = Date.now();
|
|
156
|
+
const mom = {
|
|
157
|
+
id: newId(), type: "person", subtype: PERSON_SUBTYPES.CONTACT,
|
|
158
|
+
names: ["妈妈"], identifiers: { phone: ["13800001111"] },
|
|
159
|
+
ingestedAt: now,
|
|
160
|
+
source: { adapter: "demo", adapterVersion: "0.1.0", capturedAt: now, capturedBy: "manual" },
|
|
161
|
+
};
|
|
162
|
+
const order = {
|
|
163
|
+
id: newId(), type: "event", subtype: EVENT_SUBTYPES.ORDER,
|
|
164
|
+
occurredAt: now - 86400000,
|
|
165
|
+
actor: "person-self",
|
|
166
|
+
participants: [mom.id],
|
|
167
|
+
content: { title: "妈妈生日蛋白粉", amount: { value: 288.5, currency: "CNY", direction: "out" } },
|
|
168
|
+
ingestedAt: now,
|
|
169
|
+
source: { adapter: "demo", adapterVersion: "0.1.0", capturedAt: now, capturedBy: "manual", originalId: "ord-42" },
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
v.putBatch({ ...emptyBatch(), persons: [mom], events: [order] });
|
|
173
|
+
|
|
174
|
+
// Query
|
|
175
|
+
const orders = v.queryEvents({ subtype: "order" });
|
|
176
|
+
|
|
177
|
+
// Adapter dedup before ingest
|
|
178
|
+
const exists = v.findBySource("events", "demo", "ord-42");
|
|
179
|
+
|
|
180
|
+
// Sync watermark
|
|
181
|
+
v.setWatermark("demo", "INBOX", { watermark: "42", lastSyncedAt: now });
|
|
182
|
+
const wm = v.getWatermark("demo", "INBOX");
|
|
183
|
+
|
|
184
|
+
// Rotate the master key (WAL-safe — swaps journal mode transparently)
|
|
185
|
+
v.rotateKey(generateKeyHex());
|
|
186
|
+
|
|
187
|
+
v.close();
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Key providers
|
|
191
|
+
|
|
192
|
+
Production builds inject a platform-specific KeyProvider that talks to
|
|
193
|
+
DPAPI / Keychain / Android Keystore / iOS Keychain (and optionally wraps
|
|
194
|
+
the result in a U-Key/SIMKey hardware key). Implement this 4-method
|
|
195
|
+
contract:
|
|
196
|
+
|
|
197
|
+
```js
|
|
198
|
+
{
|
|
199
|
+
async get(name) // returns hex or null
|
|
200
|
+
async set(name, hexKey) // store hex (validate it's 64 hex chars first)
|
|
201
|
+
async del(name)
|
|
202
|
+
async has(name)
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
The package ships `InMemoryKeyProvider` (tests) and `FileKeyProvider`
|
|
207
|
+
(dev fallback, stores 0600-perm files on disk). Recommended key names:
|
|
208
|
+
|
|
209
|
+
- `vault:<vault-id>` master key for a vault
|
|
210
|
+
- `vault:<vault-id>:prev` retained pre-rotation key for emergency recovery
|
|
211
|
+
- `adapter:<name>:cookie` per-adapter blobs (used by later-phase adapters)
|
|
212
|
+
|
|
213
|
+
## Tests
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
cd packages/personal-data-hub
|
|
217
|
+
npm test
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**268 tests** across 17 files covering ID generation, all 5 entity validators,
|
|
221
|
+
batch helpers, key providers, vault open/migrations, entity round-trips,
|
|
222
|
+
transactional putBatch with rollback, raw_events archive, queryEvents
|
|
223
|
+
filters + pagination, sync watermarks, audit log, key rotation (WAL-safe),
|
|
224
|
+
destroy, stats, adapter-spec assertion, KG triple derivation, RAG doc
|
|
225
|
+
derivation, MockAdapter deterministic behavior, full registry sync E2E
|
|
226
|
+
(including health-gating, mid-sync failure recovery, sink failure
|
|
227
|
+
tolerance), and the 1k events <30s ingest perf gate.
|
|
228
|
+
|
|
229
|
+
## Not in this package (yet)
|
|
230
|
+
|
|
231
|
+
| Concern | Lives in |
|
|
232
|
+
|-----------------------|---------------------------------------------------|
|
|
233
|
+
| Platform KeyProviders (DPAPI/Keychain/Keystore) | Phase 1.5 — desktop-app-vue main process bridge |
|
|
234
|
+
| AdapterRegistry | Phase 2 — same package or sibling |
|
|
235
|
+
| KG ingestor / RAG | Phase 2/4 — wired into existing KG / RAG engines |
|
|
236
|
+
| Email/Alipay/AI/WeChat adapters | Phase 5-12 — separate sub-packages |
|
|
237
|
+
| AI analysis skills | Phase 11 — `skills/personal-analysis-*/` |
|
|
238
|
+
|
|
239
|
+
## License
|
|
240
|
+
|
|
241
|
+
MIT
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { assertAdapter, SENSITIVITY_LEVELS } = require("../lib/adapter-spec");
|
|
6
|
+
const { MockAdapter } = require("../lib/mock-adapter");
|
|
7
|
+
|
|
8
|
+
describe("assertAdapter", () => {
|
|
9
|
+
it("accepts a fully-valid adapter (MockAdapter)", () => {
|
|
10
|
+
const r = assertAdapter(new MockAdapter());
|
|
11
|
+
expect(r.ok).toBe(true);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("rejects non-object input", () => {
|
|
15
|
+
expect(assertAdapter(null).ok).toBe(false);
|
|
16
|
+
expect(assertAdapter(undefined).ok).toBe(false);
|
|
17
|
+
expect(assertAdapter("oops").ok).toBe(false);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("rejects missing required fields (collects all errors, no throw)", () => {
|
|
21
|
+
const r = assertAdapter({});
|
|
22
|
+
expect(r.ok).toBe(false);
|
|
23
|
+
// Many fields missing — at least name + version + capabilities + dataDisclosure + methods.
|
|
24
|
+
expect(r.errors.length).toBeGreaterThan(4);
|
|
25
|
+
expect(r.errors.some((e) => e.includes("name"))).toBe(true);
|
|
26
|
+
expect(r.errors.some((e) => e.includes("version"))).toBe(true);
|
|
27
|
+
expect(r.errors.some((e) => e.includes("authenticate"))).toBe(true);
|
|
28
|
+
expect(r.errors.some((e) => e.includes("sync"))).toBe(true);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("rejects invalid sensitivity", () => {
|
|
32
|
+
const a = new MockAdapter();
|
|
33
|
+
a.dataDisclosure = { ...a.dataDisclosure, sensitivity: "extreme" };
|
|
34
|
+
const r = assertAdapter(a);
|
|
35
|
+
expect(r.ok).toBe(false);
|
|
36
|
+
expect(r.errors.some((e) => e.includes("sensitivity"))).toBe(true);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("rejects non-boolean legalGate", () => {
|
|
40
|
+
const a = new MockAdapter();
|
|
41
|
+
a.dataDisclosure = { ...a.dataDisclosure, legalGate: "yes" };
|
|
42
|
+
const r = assertAdapter(a);
|
|
43
|
+
expect(r.ok).toBe(false);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("rejects non-array capabilities", () => {
|
|
47
|
+
const a = new MockAdapter();
|
|
48
|
+
a.capabilities = "sync";
|
|
49
|
+
const r = assertAdapter(a);
|
|
50
|
+
expect(r.ok).toBe(false);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("rejects rateLimits with negative value", () => {
|
|
54
|
+
const a = new MockAdapter();
|
|
55
|
+
a.rateLimits = { perMinute: -1 };
|
|
56
|
+
expect(assertAdapter(a).ok).toBe(false);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("accepts adapter without rateLimits (optional field)", () => {
|
|
60
|
+
const a = new MockAdapter();
|
|
61
|
+
delete a.rateLimits;
|
|
62
|
+
expect(assertAdapter(a).ok).toBe(true);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("rejects non-function authenticate / sync / normalize / healthCheck", () => {
|
|
66
|
+
const a = new MockAdapter();
|
|
67
|
+
a.authenticate = "not a function";
|
|
68
|
+
expect(assertAdapter(a).ok).toBe(false);
|
|
69
|
+
|
|
70
|
+
const b = new MockAdapter();
|
|
71
|
+
b.normalize = 42;
|
|
72
|
+
expect(assertAdapter(b).ok).toBe(false);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("SENSITIVITY_LEVELS lists low/medium/high", () => {
|
|
76
|
+
expect(SENSITIVITY_LEVELS).toEqual(["low", "medium", "high"]);
|
|
77
|
+
});
|
|
78
|
+
});
|