@chainlesschain/personal-data-hub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +241 -0
  2. package/__tests__/adapter-spec.test.js +78 -0
  3. package/__tests__/adapters/email-adapter.test.js +605 -0
  4. package/__tests__/adapters/email-imap-session.test.js +334 -0
  5. package/__tests__/adapters/email-parser.test.js +244 -0
  6. package/__tests__/adapters/email-providers.test.js +84 -0
  7. package/__tests__/analysis.test.js +302 -0
  8. package/__tests__/batch.test.js +133 -0
  9. package/__tests__/bridges-cc-kg.test.js +231 -0
  10. package/__tests__/bridges-cc-llm.test.js +191 -0
  11. package/__tests__/bridges-cc-rag.test.js +162 -0
  12. package/__tests__/ids.test.js +45 -0
  13. package/__tests__/key-providers.test.js +126 -0
  14. package/__tests__/kg-derive.test.js +219 -0
  15. package/__tests__/llm-client.test.js +122 -0
  16. package/__tests__/mock-adapter.test.js +93 -0
  17. package/__tests__/prompt-builder.test.js +204 -0
  18. package/__tests__/query-parser.test.js +150 -0
  19. package/__tests__/rag-derive.test.js +169 -0
  20. package/__tests__/registry.test.js +304 -0
  21. package/__tests__/schemas.test.js +331 -0
  22. package/__tests__/vault.test.js +506 -0
  23. package/lib/adapter-spec.js +155 -0
  24. package/lib/adapters/email-imap/email-adapter.js +398 -0
  25. package/lib/adapters/email-imap/email-parser.js +177 -0
  26. package/lib/adapters/email-imap/imap-session.js +294 -0
  27. package/lib/adapters/email-imap/index.js +26 -0
  28. package/lib/adapters/email-imap/providers.js +111 -0
  29. package/lib/analysis.js +226 -0
  30. package/lib/batch.js +123 -0
  31. package/lib/bridges/cc-kg-sink.js +264 -0
  32. package/lib/bridges/cc-llm-adapter.js +169 -0
  33. package/lib/bridges/cc-rag-sink.js +118 -0
  34. package/lib/bridges/index.js +44 -0
  35. package/lib/constants.js +92 -0
  36. package/lib/ids.js +103 -0
  37. package/lib/index.js +141 -0
  38. package/lib/key-providers.js +146 -0
  39. package/lib/kg-derive.js +214 -0
  40. package/lib/llm-client.js +171 -0
  41. package/lib/migrations.js +246 -0
  42. package/lib/mock-adapter.js +199 -0
  43. package/lib/prompt-builder.js +205 -0
  44. package/lib/query-parser.js +250 -0
  45. package/lib/rag-derive.js +186 -0
  46. package/lib/registry.js +398 -0
  47. package/lib/schemas.js +379 -0
  48. package/lib/vault.js +883 -0
  49. package/package.json +63 -0
  50. package/vitest.config.js +10 -0
package/README.md ADDED
@@ -0,0 +1,241 @@
1
+ # @chainlesschain/personal-data-hub
2
+
3
+ Personal Data Hub — UnifiedSchema, validators, batch helpers, SQLCipher
4
+ LocalVault, and AdapterRegistry for the "data back to the individual"
5
+ middleware.
6
+
7
+ > **Phase 0 + Phase 1 + Phase 2 + Phase 3 + Phase 3.5 landed** of the 13-phase plan in
8
+ > [`docs/design/Personal_Data_Hub_Architecture.md`](../../docs/design/Personal_Data_Hub_Architecture.md).
9
+ > Phase 0 covers schema + validation + ID generation.
10
+ > Phase 1 adds SQLCipher LocalVault + pluggable key providers + migrations.
11
+ > Phase 2 adds AdapterRegistry + KG/RAG derivation + MockAdapter (1000
12
+ > events ingest in ~600ms — 50× under the 30s target).
13
+ > Phase 3 adds the natural-language AnalysisEngine: query parser → vault
14
+ > facts → prompt builder → LLM → citation validation, with a privacy gate
15
+ > that refuses non-local LLMs unless caller opts in. **MockLLMClient**
16
+ > for tests, **OllamaClient** for standalone use.
17
+ > Phase 3.5 wires production bridges: **CcLLMAdapter** wraps the existing
18
+ > cc llm-manager (Ollama / Volcengine / Anthropic / Gemini / DeepSeek)
19
+ > via dependency injection; **CcKgSink** translates hub triples into the
20
+ > existing knowledge-graph addEntity + addRelation; **CcRagSink** feeds
21
+ > hub RagDocs into BM25 (Qdrant vector store wiring left as future work).
22
+ > Hub package stays decoupled — bridges take cc functions as constructor
23
+ > args rather than importing cc modules directly.
24
+ > Sync engine UI, real KG/RAG wiring, and the actual adapters (Email,
25
+ > Alipay, AI Chat × 8, WeChat, ...) come in later phases.
26
+
27
+ ## What's in here
28
+
29
+ ```
30
+ lib/
31
+ ├── constants.js enum values (entity types, subtypes, capturedBy, ...)
32
+ ├── ids.js UUID v7 (hand-rolled RFC 9562, ~30 LOC, no dep)
33
+ ├── schemas.js per-entity validators (Person/Event/Place/Item/Topic)
34
+ ├── batch.js NormalizedBatch helpers (empty/merge/validate/partition)
35
+ ├── migrations.js LocalVault schema (events/persons/places/items/topics
36
+ │ /sync_watermarks/audit_log/raw_events) + versioning
37
+ ├── key-providers.js InMemoryKeyProvider + FileKeyProvider + KeyProvider
38
+ │ contract for platform Keystore impls in later phases
39
+ ├── vault.js LocalVault — SQLCipher AES-256, transactional putBatch,
40
+ │ typed put/get, queryEvents, watermarks, audit, key
41
+ │ rotation (WAL-safe), destroy
42
+ ├── adapter-spec.js PersonalDataAdapter contract + assertAdapter check
43
+ ├── kg-derive.js UnifiedSchema → KG triples (rdf:type / by / involves /
44
+ │ happened-at / etc.) — engine-agnostic
45
+ ├── rag-derive.js UnifiedSchema → RAG (text, metadata) docs for indexing
46
+ │ into BM25 + vector retrievers
47
+ ├── registry.js AdapterRegistry — register/list, syncAdapter with full
48
+ │ pipeline (health → sync → archive raw → normalize →
49
+ │ partition valid/invalid → vault → KG sink → RAG sink
50
+ │ → watermark → audit), syncAll, pluggable kgSink/ragSink
51
+ ├── mock-adapter.js reference impl + test fixture (deterministic seeded)
52
+ ├── query-parser.js heuristic time-window + filter + intent extraction
53
+ │ from natural-language questions
54
+ ├── prompt-builder.js fact summarization + system/user prompt construction
55
+ │ (system prompt is fact-free; facts go in user role as
56
+ │ marked-untrusted JSON) + citation parser + validator
57
+ ├── llm-client.js MockLLMClient (tests) + OllamaClient (default standalone)
58
+ │ conforming to the chat({messages}) → {text, usage}
59
+ │ contract. Production plugs in CcLLMAdapter wrapping
60
+ │ the existing desktop-app-vue llm-manager.
61
+ ├── analysis.js AnalysisEngine — orchestrates parseQuery → vault facts
62
+ │ (optional RAG augmentation) → buildPrompt → llm.chat →
63
+ │ parseCitations → validateCitations → audit. Hard
64
+ │ privacy gate refuses non-local LLMs without opt-in.
65
+ ├── bridges/
66
+ │ ├── cc-llm-adapter.js wraps cc llm-manager.chat → LLMClient
67
+ │ ├── cc-kg-sink.js hub triples → cc addEntity + addRelation
68
+ │ ├── cc-rag-sink.js hub RagDocs → cc BM25 (+ optional vector)
69
+ │ └── index.js re-exports
70
+ └── index.js re-exports
71
+ ```
72
+
73
+ ## The 5 core entities
74
+
75
+ Mirrors §5.1 of the design doc. Every adapter normalizes its raw rows into
76
+ these five types so the KG / RAG / analysis layers see a consistent shape.
77
+
78
+ | Type | Examples |
79
+ |--------|------------------------------------------------|
80
+ | Person | self / contact / merchant / ai-agent |
81
+ | Event | message / order / payment / visit / post / ai-message / ai-image-generation / ... |
82
+ | Place | home / restaurant / mom's place |
83
+ | Item | product / link / media / document |
84
+ | Topic | "mom's health" / "Python learning" / "AI conversation with DeepSeek" |
85
+
86
+ All entities share `BaseEntity` fields:
87
+ - `id` — UUID v7 (time-ordered)
88
+ - `source` — `{ adapter, adapterVersion, capturedAt, capturedBy, originalId? }`
89
+ - `ingestedAt` — ms timestamp
90
+ - `confidence` — 0..1, optional
91
+ - `extra` — schemaless bag for adapter-specific fields
92
+
93
+ ## Usage
94
+
95
+ ```js
96
+ const {
97
+ newId,
98
+ validate,
99
+ validatePerson,
100
+ validateBatch,
101
+ partitionBatch,
102
+ PERSON_SUBTYPES,
103
+ EVENT_SUBTYPES,
104
+ } = require("@chainlesschain/personal-data-hub");
105
+
106
+ const person = {
107
+ id: newId(),
108
+ type: "person",
109
+ subtype: PERSON_SUBTYPES.CONTACT,
110
+ names: ["妈妈", "陈某某"],
111
+ identifiers: { phone: ["138-0000-1111"] },
112
+ ingestedAt: Date.now(),
113
+ source: {
114
+ adapter: "wechat",
115
+ adapterVersion: "0.1.0",
116
+ capturedAt: Date.now(),
117
+ capturedBy: "sqlite",
118
+ originalId: "wxid_xyz",
119
+ },
120
+ };
121
+
122
+ const { valid, errors } = validatePerson(person);
123
+ // → { valid: true, errors: [] }
124
+ ```
125
+
126
+ ## Validators never throw
127
+
128
+ All validators return `{ valid: boolean, errors: string[] }`. This lets the
129
+ adapter ingest pipeline collect every bad row in one pass and ship them to
130
+ a review queue instead of failing the whole sync window on the first corrupt
131
+ entry from a flaky third-party data source.
132
+
133
+ ```js
134
+ const { partitionBatch } = require("@chainlesschain/personal-data-hub");
135
+
136
+ const { valid, invalid, invalidReasons } = partitionBatch(rawBatch);
137
+ // commit `valid` to vault, spool `invalid` to review queue
138
+ ```
139
+
140
+ ## LocalVault quick demo
141
+
142
+ ```js
143
+ const fs = require("fs"), os = require("os"), path = require("path");
144
+ const {
145
+ LocalVault, generateKeyHex, newId, emptyBatch,
146
+ PERSON_SUBTYPES, EVENT_SUBTYPES,
147
+ } = require("@chainlesschain/personal-data-hub");
148
+
149
+ const v = new LocalVault({
150
+ path: path.join(os.homedir(), ".chainlesschain", "hub.db"),
151
+ key: generateKeyHex(), // production: pull from a KeyProvider
152
+ });
153
+ v.open();
154
+
155
+ const now = Date.now();
156
+ const mom = {
157
+ id: newId(), type: "person", subtype: PERSON_SUBTYPES.CONTACT,
158
+ names: ["妈妈"], identifiers: { phone: ["13800001111"] },
159
+ ingestedAt: now,
160
+ source: { adapter: "demo", adapterVersion: "0.1.0", capturedAt: now, capturedBy: "manual" },
161
+ };
162
+ const order = {
163
+ id: newId(), type: "event", subtype: EVENT_SUBTYPES.ORDER,
164
+ occurredAt: now - 86400000,
165
+ actor: "person-self",
166
+ participants: [mom.id],
167
+ content: { title: "妈妈生日蛋白粉", amount: { value: 288.5, currency: "CNY", direction: "out" } },
168
+ ingestedAt: now,
169
+ source: { adapter: "demo", adapterVersion: "0.1.0", capturedAt: now, capturedBy: "manual", originalId: "ord-42" },
170
+ };
171
+
172
+ v.putBatch({ ...emptyBatch(), persons: [mom], events: [order] });
173
+
174
+ // Query
175
+ const orders = v.queryEvents({ subtype: "order" });
176
+
177
+ // Adapter dedup before ingest
178
+ const exists = v.findBySource("events", "demo", "ord-42");
179
+
180
+ // Sync watermark
181
+ v.setWatermark("demo", "INBOX", { watermark: "42", lastSyncedAt: now });
182
+ const wm = v.getWatermark("demo", "INBOX");
183
+
184
+ // Rotate the master key (WAL-safe — swaps journal mode transparently)
185
+ v.rotateKey(generateKeyHex());
186
+
187
+ v.close();
188
+ ```
189
+
190
+ ## Key providers
191
+
192
+ Production builds inject a platform-specific KeyProvider that talks to
193
+ DPAPI / Keychain / Android Keystore / iOS Keychain (and optionally wraps
194
+ the result in a U-Key/SIMKey hardware key). Implement this 4-method
195
+ contract:
196
+
197
+ ```js
198
+ {
199
+ async get(name) // returns hex or null
200
+ async set(name, hexKey) // store hex (validate it's 64 hex chars first)
201
+ async del(name)
202
+ async has(name)
203
+ }
204
+ ```
205
+
206
+ The package ships `InMemoryKeyProvider` (tests) and `FileKeyProvider`
207
+ (dev fallback, stores 0600-perm files on disk). Recommended key names:
208
+
209
+ - `vault:<vault-id>` master key for a vault
210
+ - `vault:<vault-id>:prev` retained pre-rotation key for emergency recovery
211
+ - `adapter:<name>:cookie` per-adapter blobs (used by later-phase adapters)
212
+
213
+ ## Tests
214
+
215
+ ```bash
216
+ cd packages/personal-data-hub
217
+ npm test
218
+ ```
219
+
220
+ **268 tests** across 17 files covering ID generation, all 5 entity validators,
221
+ batch helpers, key providers, vault open/migrations, entity round-trips,
222
+ transactional putBatch with rollback, raw_events archive, queryEvents
223
+ filters + pagination, sync watermarks, audit log, key rotation (WAL-safe),
224
+ destroy, stats, adapter-spec assertion, KG triple derivation, RAG doc
225
+ derivation, MockAdapter deterministic behavior, full registry sync E2E
226
+ (including health-gating, mid-sync failure recovery, sink failure
227
+ tolerance), and the 1k events <30s ingest perf gate.
228
+
229
+ ## Not in this package (yet)
230
+
231
+ | Concern | Lives in |
232
+ |-----------------------|---------------------------------------------------|
233
+ | Platform KeyProviders (DPAPI/Keychain/Keystore) | Phase 1.5 — desktop-app-vue main process bridge |
234
+ | AdapterRegistry | Phase 2 — same package or sibling |
235
+ | KG ingestor / RAG | Phase 2/4 — wired into existing KG / RAG engines |
236
+ | Email/Alipay/AI/WeChat adapters | Phase 5-12 — separate sub-packages |
237
+ | AI analysis skills | Phase 11 — `skills/personal-analysis-*/` |
238
+
239
+ ## License
240
+
241
+ MIT
@@ -0,0 +1,78 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect } from "vitest";
4
+
5
+ const { assertAdapter, SENSITIVITY_LEVELS } = require("../lib/adapter-spec");
6
+ const { MockAdapter } = require("../lib/mock-adapter");
7
+
8
+ describe("assertAdapter", () => {
9
+ it("accepts a fully-valid adapter (MockAdapter)", () => {
10
+ const r = assertAdapter(new MockAdapter());
11
+ expect(r.ok).toBe(true);
12
+ });
13
+
14
+ it("rejects non-object input", () => {
15
+ expect(assertAdapter(null).ok).toBe(false);
16
+ expect(assertAdapter(undefined).ok).toBe(false);
17
+ expect(assertAdapter("oops").ok).toBe(false);
18
+ });
19
+
20
+ it("rejects missing required fields (collects all errors, no throw)", () => {
21
+ const r = assertAdapter({});
22
+ expect(r.ok).toBe(false);
23
+ // Many fields missing — at least name + version + capabilities + dataDisclosure + methods.
24
+ expect(r.errors.length).toBeGreaterThan(4);
25
+ expect(r.errors.some((e) => e.includes("name"))).toBe(true);
26
+ expect(r.errors.some((e) => e.includes("version"))).toBe(true);
27
+ expect(r.errors.some((e) => e.includes("authenticate"))).toBe(true);
28
+ expect(r.errors.some((e) => e.includes("sync"))).toBe(true);
29
+ });
30
+
31
+ it("rejects invalid sensitivity", () => {
32
+ const a = new MockAdapter();
33
+ a.dataDisclosure = { ...a.dataDisclosure, sensitivity: "extreme" };
34
+ const r = assertAdapter(a);
35
+ expect(r.ok).toBe(false);
36
+ expect(r.errors.some((e) => e.includes("sensitivity"))).toBe(true);
37
+ });
38
+
39
+ it("rejects non-boolean legalGate", () => {
40
+ const a = new MockAdapter();
41
+ a.dataDisclosure = { ...a.dataDisclosure, legalGate: "yes" };
42
+ const r = assertAdapter(a);
43
+ expect(r.ok).toBe(false);
44
+ });
45
+
46
+ it("rejects non-array capabilities", () => {
47
+ const a = new MockAdapter();
48
+ a.capabilities = "sync";
49
+ const r = assertAdapter(a);
50
+ expect(r.ok).toBe(false);
51
+ });
52
+
53
+ it("rejects rateLimits with negative value", () => {
54
+ const a = new MockAdapter();
55
+ a.rateLimits = { perMinute: -1 };
56
+ expect(assertAdapter(a).ok).toBe(false);
57
+ });
58
+
59
+ it("accepts adapter without rateLimits (optional field)", () => {
60
+ const a = new MockAdapter();
61
+ delete a.rateLimits;
62
+ expect(assertAdapter(a).ok).toBe(true);
63
+ });
64
+
65
+ it("rejects non-function authenticate / sync / normalize / healthCheck", () => {
66
+ const a = new MockAdapter();
67
+ a.authenticate = "not a function";
68
+ expect(assertAdapter(a).ok).toBe(false);
69
+
70
+ const b = new MockAdapter();
71
+ b.normalize = 42;
72
+ expect(assertAdapter(b).ok).toBe(false);
73
+ });
74
+
75
+ it("SENSITIVITY_LEVELS lists low/medium/high", () => {
76
+ expect(SENSITIVITY_LEVELS).toEqual(["low", "medium", "high"]);
77
+ });
78
+ });