@chainlesschain/personal-data-hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -0
- package/__tests__/adapter-spec.test.js +78 -0
- package/__tests__/adapters/email-adapter.test.js +605 -0
- package/__tests__/adapters/email-imap-session.test.js +334 -0
- package/__tests__/adapters/email-parser.test.js +244 -0
- package/__tests__/adapters/email-providers.test.js +84 -0
- package/__tests__/analysis.test.js +302 -0
- package/__tests__/batch.test.js +133 -0
- package/__tests__/bridges-cc-kg.test.js +231 -0
- package/__tests__/bridges-cc-llm.test.js +191 -0
- package/__tests__/bridges-cc-rag.test.js +162 -0
- package/__tests__/ids.test.js +45 -0
- package/__tests__/key-providers.test.js +126 -0
- package/__tests__/kg-derive.test.js +219 -0
- package/__tests__/llm-client.test.js +122 -0
- package/__tests__/mock-adapter.test.js +93 -0
- package/__tests__/prompt-builder.test.js +204 -0
- package/__tests__/query-parser.test.js +150 -0
- package/__tests__/rag-derive.test.js +169 -0
- package/__tests__/registry.test.js +304 -0
- package/__tests__/schemas.test.js +331 -0
- package/__tests__/vault.test.js +506 -0
- package/lib/adapter-spec.js +155 -0
- package/lib/adapters/email-imap/email-adapter.js +398 -0
- package/lib/adapters/email-imap/email-parser.js +177 -0
- package/lib/adapters/email-imap/imap-session.js +294 -0
- package/lib/adapters/email-imap/index.js +26 -0
- package/lib/adapters/email-imap/providers.js +111 -0
- package/lib/analysis.js +226 -0
- package/lib/batch.js +123 -0
- package/lib/bridges/cc-kg-sink.js +264 -0
- package/lib/bridges/cc-llm-adapter.js +169 -0
- package/lib/bridges/cc-rag-sink.js +118 -0
- package/lib/bridges/index.js +44 -0
- package/lib/constants.js +92 -0
- package/lib/ids.js +103 -0
- package/lib/index.js +141 -0
- package/lib/key-providers.js +146 -0
- package/lib/kg-derive.js +214 -0
- package/lib/llm-client.js +171 -0
- package/lib/migrations.js +246 -0
- package/lib/mock-adapter.js +199 -0
- package/lib/prompt-builder.js +205 -0
- package/lib/query-parser.js +250 -0
- package/lib/rag-derive.js +186 -0
- package/lib/registry.js +398 -0
- package/lib/schemas.js +379 -0
- package/lib/vault.js +883 -0
- package/package.json +63 -0
- package/vitest.config.js +10 -0
package/lib/schemas.js
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UnifiedSchema entity validators
|
|
3
|
+
*
|
|
4
|
+
* Mirrors §5 of docs/design/Personal_Data_Hub_Architecture.md exactly.
|
|
5
|
+
*
|
|
6
|
+
* Design choices:
|
|
7
|
+
* - Pure JS (no zod / ajv) — keep prototype lean. Validators are ~150 LOC total.
|
|
8
|
+
* - Validators return { valid: boolean, errors: string[] } rather than throwing,
|
|
9
|
+
* so adapter pipelines can collect & report many bad rows in one pass.
|
|
10
|
+
* - "extra" is intentionally schemaless (per design doc §5.2 "schemaless 兜底").
|
|
11
|
+
* - Required fields are strict; optional fields are tolerated when undefined
|
|
12
|
+
* but rejected when present with wrong type.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
"use strict";
|
|
16
|
+
|
|
17
|
+
const {
|
|
18
|
+
ENTITY_TYPES,
|
|
19
|
+
PERSON_SUBTYPES,
|
|
20
|
+
EVENT_SUBTYPES,
|
|
21
|
+
ITEM_SUBTYPES,
|
|
22
|
+
CAPTURED_BY,
|
|
23
|
+
AMOUNT_DIRECTIONS,
|
|
24
|
+
} = require("./constants");
|
|
25
|
+
|
|
26
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
function isString(v) {
|
|
29
|
+
return typeof v === "string";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function isNonEmptyString(v) {
|
|
33
|
+
return typeof v === "string" && v.length > 0;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function isFiniteNumber(v) {
|
|
37
|
+
return typeof v === "number" && Number.isFinite(v);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function isInt(v) {
|
|
41
|
+
return Number.isInteger(v);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function isPlainObject(v) {
|
|
45
|
+
return v !== null && typeof v === "object" && !Array.isArray(v);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function isStringArray(v) {
|
|
49
|
+
return Array.isArray(v) && v.every(isString);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function isEnum(v, enumObj) {
|
|
53
|
+
return Object.values(enumObj).includes(v);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function pushIf(errors, cond, msg) {
|
|
57
|
+
if (!cond) errors.push(msg);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ─── BaseEntity validation (shared) ──────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
function validateBase(entity, errors, ctx = "") {
|
|
63
|
+
const p = ctx ? ctx + "." : "";
|
|
64
|
+
|
|
65
|
+
pushIf(errors, isNonEmptyString(entity.id), `${p}id must be a non-empty string`);
|
|
66
|
+
|
|
67
|
+
// ingestedAt: ms timestamp (int)
|
|
68
|
+
pushIf(
|
|
69
|
+
errors,
|
|
70
|
+
isInt(entity.ingestedAt) && entity.ingestedAt > 0,
|
|
71
|
+
`${p}ingestedAt must be a positive integer (ms timestamp)`
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
// confidence: optional but if present 0..1
|
|
75
|
+
if (entity.confidence !== undefined) {
|
|
76
|
+
pushIf(
|
|
77
|
+
errors,
|
|
78
|
+
isFiniteNumber(entity.confidence) && entity.confidence >= 0 && entity.confidence <= 1,
|
|
79
|
+
`${p}confidence must be a number in [0,1] when present`
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// source: required object
|
|
84
|
+
if (!isPlainObject(entity.source)) {
|
|
85
|
+
errors.push(`${p}source must be a plain object`);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const s = entity.source;
|
|
89
|
+
pushIf(errors, isNonEmptyString(s.adapter), `${p}source.adapter must be a non-empty string`);
|
|
90
|
+
pushIf(
|
|
91
|
+
errors,
|
|
92
|
+
isNonEmptyString(s.adapterVersion),
|
|
93
|
+
`${p}source.adapterVersion must be a non-empty string`
|
|
94
|
+
);
|
|
95
|
+
pushIf(
|
|
96
|
+
errors,
|
|
97
|
+
isInt(s.capturedAt) && s.capturedAt > 0,
|
|
98
|
+
`${p}source.capturedAt must be a positive integer ms timestamp`
|
|
99
|
+
);
|
|
100
|
+
pushIf(
|
|
101
|
+
errors,
|
|
102
|
+
isEnum(s.capturedBy, CAPTURED_BY),
|
|
103
|
+
`${p}source.capturedBy must be one of ${Object.values(CAPTURED_BY).join("|")}`
|
|
104
|
+
);
|
|
105
|
+
if (s.originalId !== undefined) {
|
|
106
|
+
pushIf(errors, isString(s.originalId), `${p}source.originalId must be a string when present`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// extra: optional object
|
|
110
|
+
if (entity.extra !== undefined) {
|
|
111
|
+
pushIf(errors, isPlainObject(entity.extra), `${p}extra must be a plain object when present`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ─── Person ──────────────────────────────────────────────────────────────
|
|
116
|
+
|
|
117
|
+
function validatePerson(p) {
|
|
118
|
+
const errors = [];
|
|
119
|
+
if (!isPlainObject(p)) return { valid: false, errors: ["entity must be a plain object"] };
|
|
120
|
+
|
|
121
|
+
pushIf(errors, p.type === ENTITY_TYPES.PERSON, 'type must be "person"');
|
|
122
|
+
pushIf(
|
|
123
|
+
errors,
|
|
124
|
+
isEnum(p.subtype, PERSON_SUBTYPES),
|
|
125
|
+
`subtype must be one of ${Object.values(PERSON_SUBTYPES).join("|")}`
|
|
126
|
+
);
|
|
127
|
+
pushIf(
|
|
128
|
+
errors,
|
|
129
|
+
Array.isArray(p.names) && p.names.length > 0 && p.names.every(isNonEmptyString),
|
|
130
|
+
"names must be a non-empty array of non-empty strings"
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
if (p.identifiers !== undefined) {
|
|
134
|
+
if (!isPlainObject(p.identifiers)) {
|
|
135
|
+
errors.push("identifiers must be a plain object when present");
|
|
136
|
+
} else {
|
|
137
|
+
for (const [k, v] of Object.entries(p.identifiers)) {
|
|
138
|
+
if (v === undefined) continue;
|
|
139
|
+
if (Array.isArray(v)) {
|
|
140
|
+
if (!v.every(isString)) errors.push(`identifiers.${k} must be string[] when array`);
|
|
141
|
+
} else if (!isString(v)) {
|
|
142
|
+
errors.push(`identifiers.${k} must be string or string[]`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (p.relation !== undefined) {
|
|
149
|
+
pushIf(errors, isString(p.relation), "relation must be a string when present");
|
|
150
|
+
}
|
|
151
|
+
if (p.notes !== undefined) {
|
|
152
|
+
pushIf(errors, isString(p.notes), "notes must be a string when present");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
validateBase(p, errors);
|
|
156
|
+
return { valid: errors.length === 0, errors };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ─── Event ───────────────────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
function validateEvent(e) {
|
|
162
|
+
const errors = [];
|
|
163
|
+
if (!isPlainObject(e)) return { valid: false, errors: ["entity must be a plain object"] };
|
|
164
|
+
|
|
165
|
+
pushIf(errors, e.type === ENTITY_TYPES.EVENT, 'type must be "event"');
|
|
166
|
+
pushIf(
|
|
167
|
+
errors,
|
|
168
|
+
isEnum(e.subtype, EVENT_SUBTYPES),
|
|
169
|
+
`subtype must be one of ${Object.values(EVENT_SUBTYPES).join("|")}`
|
|
170
|
+
);
|
|
171
|
+
pushIf(
|
|
172
|
+
errors,
|
|
173
|
+
isInt(e.occurredAt) && e.occurredAt > 0,
|
|
174
|
+
"occurredAt must be a positive integer ms timestamp"
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
if (e.durationMs !== undefined) {
|
|
178
|
+
pushIf(
|
|
179
|
+
errors,
|
|
180
|
+
isInt(e.durationMs) && e.durationMs >= 0,
|
|
181
|
+
"durationMs must be a non-negative integer when present"
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
if (e.actor !== undefined) {
|
|
185
|
+
pushIf(errors, isString(e.actor), "actor must be a string when present");
|
|
186
|
+
}
|
|
187
|
+
if (e.participants !== undefined) {
|
|
188
|
+
pushIf(errors, isStringArray(e.participants), "participants must be string[] when present");
|
|
189
|
+
}
|
|
190
|
+
if (e.place !== undefined) {
|
|
191
|
+
pushIf(errors, isString(e.place), "place must be a string when present");
|
|
192
|
+
}
|
|
193
|
+
if (e.items !== undefined) {
|
|
194
|
+
pushIf(errors, isStringArray(e.items), "items must be string[] when present");
|
|
195
|
+
}
|
|
196
|
+
if (e.topics !== undefined) {
|
|
197
|
+
pushIf(errors, isStringArray(e.topics), "topics must be string[] when present");
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (!isPlainObject(e.content)) {
|
|
201
|
+
errors.push("content must be a plain object");
|
|
202
|
+
} else {
|
|
203
|
+
const c = e.content;
|
|
204
|
+
if (c.text !== undefined) pushIf(errors, isString(c.text), "content.text must be string");
|
|
205
|
+
if (c.title !== undefined) pushIf(errors, isString(c.title), "content.title must be string");
|
|
206
|
+
if (c.mediaRefs !== undefined) {
|
|
207
|
+
pushIf(errors, isStringArray(c.mediaRefs), "content.mediaRefs must be string[]");
|
|
208
|
+
}
|
|
209
|
+
if (c.amount !== undefined) {
|
|
210
|
+
if (!isPlainObject(c.amount)) {
|
|
211
|
+
errors.push("content.amount must be a plain object when present");
|
|
212
|
+
} else {
|
|
213
|
+
pushIf(
|
|
214
|
+
errors,
|
|
215
|
+
isFiniteNumber(c.amount.value),
|
|
216
|
+
"content.amount.value must be a finite number"
|
|
217
|
+
);
|
|
218
|
+
pushIf(
|
|
219
|
+
errors,
|
|
220
|
+
isNonEmptyString(c.amount.currency),
|
|
221
|
+
"content.amount.currency must be a non-empty string"
|
|
222
|
+
);
|
|
223
|
+
pushIf(
|
|
224
|
+
errors,
|
|
225
|
+
isEnum(c.amount.direction, AMOUNT_DIRECTIONS),
|
|
226
|
+
`content.amount.direction must be one of ${Object.values(AMOUNT_DIRECTIONS).join("|")}`
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
validateBase(e, errors);
|
|
233
|
+
return { valid: errors.length === 0, errors };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ─── Place ───────────────────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
function validatePlace(p) {
|
|
239
|
+
const errors = [];
|
|
240
|
+
if (!isPlainObject(p)) return { valid: false, errors: ["entity must be a plain object"] };
|
|
241
|
+
|
|
242
|
+
pushIf(errors, p.type === ENTITY_TYPES.PLACE, 'type must be "place"');
|
|
243
|
+
pushIf(errors, isNonEmptyString(p.name), "name must be a non-empty string");
|
|
244
|
+
|
|
245
|
+
if (p.coordinates !== undefined) {
|
|
246
|
+
if (!isPlainObject(p.coordinates)) {
|
|
247
|
+
errors.push("coordinates must be a plain object when present");
|
|
248
|
+
} else {
|
|
249
|
+
pushIf(
|
|
250
|
+
errors,
|
|
251
|
+
isFiniteNumber(p.coordinates.lat) && p.coordinates.lat >= -90 && p.coordinates.lat <= 90,
|
|
252
|
+
"coordinates.lat must be a number in [-90,90]"
|
|
253
|
+
);
|
|
254
|
+
pushIf(
|
|
255
|
+
errors,
|
|
256
|
+
isFiniteNumber(p.coordinates.lng) && p.coordinates.lng >= -180 && p.coordinates.lng <= 180,
|
|
257
|
+
"coordinates.lng must be a number in [-180,180]"
|
|
258
|
+
);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (p.address !== undefined) {
|
|
263
|
+
pushIf(errors, isString(p.address), "address must be a string when present");
|
|
264
|
+
}
|
|
265
|
+
if (p.category !== undefined) {
|
|
266
|
+
pushIf(errors, isString(p.category), "category must be a string when present");
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// aliases: per design doc, present (possibly empty) array
|
|
270
|
+
pushIf(
|
|
271
|
+
errors,
|
|
272
|
+
Array.isArray(p.aliases) && p.aliases.every(isString),
|
|
273
|
+
"aliases must be a (possibly empty) array of strings"
|
|
274
|
+
);
|
|
275
|
+
|
|
276
|
+
validateBase(p, errors);
|
|
277
|
+
return { valid: errors.length === 0, errors };
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// ─── Item ────────────────────────────────────────────────────────────────
|
|
281
|
+
|
|
282
|
+
function validateItem(i) {
|
|
283
|
+
const errors = [];
|
|
284
|
+
if (!isPlainObject(i)) return { valid: false, errors: ["entity must be a plain object"] };
|
|
285
|
+
|
|
286
|
+
pushIf(errors, i.type === ENTITY_TYPES.ITEM, 'type must be "item"');
|
|
287
|
+
pushIf(
|
|
288
|
+
errors,
|
|
289
|
+
isEnum(i.subtype, ITEM_SUBTYPES),
|
|
290
|
+
`subtype must be one of ${Object.values(ITEM_SUBTYPES).join("|")}`
|
|
291
|
+
);
|
|
292
|
+
pushIf(errors, isNonEmptyString(i.name), "name must be a non-empty string");
|
|
293
|
+
|
|
294
|
+
if (i.category !== undefined) {
|
|
295
|
+
pushIf(errors, isString(i.category), "category must be a string when present");
|
|
296
|
+
}
|
|
297
|
+
if (i.price !== undefined) {
|
|
298
|
+
if (!isPlainObject(i.price)) {
|
|
299
|
+
errors.push("price must be a plain object when present");
|
|
300
|
+
} else {
|
|
301
|
+
pushIf(errors, isFiniteNumber(i.price.value), "price.value must be a finite number");
|
|
302
|
+
pushIf(
|
|
303
|
+
errors,
|
|
304
|
+
isNonEmptyString(i.price.currency),
|
|
305
|
+
"price.currency must be a non-empty string"
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
if (i.merchant !== undefined) {
|
|
310
|
+
pushIf(errors, isString(i.merchant), "merchant must be a string when present");
|
|
311
|
+
}
|
|
312
|
+
if (i.externalUrl !== undefined) {
|
|
313
|
+
pushIf(errors, isString(i.externalUrl), "externalUrl must be a string when present");
|
|
314
|
+
}
|
|
315
|
+
if (i.thumbnailLocalPath !== undefined) {
|
|
316
|
+
pushIf(errors, isString(i.thumbnailLocalPath), "thumbnailLocalPath must be a string when present");
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
validateBase(i, errors);
|
|
320
|
+
return { valid: errors.length === 0, errors };
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// ─── Topic ───────────────────────────────────────────────────────────────
|
|
324
|
+
|
|
325
|
+
function validateTopic(t) {
|
|
326
|
+
const errors = [];
|
|
327
|
+
if (!isPlainObject(t)) return { valid: false, errors: ["entity must be a plain object"] };
|
|
328
|
+
|
|
329
|
+
pushIf(errors, t.type === ENTITY_TYPES.TOPIC, 'type must be "topic"');
|
|
330
|
+
pushIf(errors, isNonEmptyString(t.name), "name must be a non-empty string");
|
|
331
|
+
|
|
332
|
+
if (t.parentTopic !== undefined) {
|
|
333
|
+
pushIf(errors, isString(t.parentTopic), "parentTopic must be a string when present");
|
|
334
|
+
}
|
|
335
|
+
if (t.derivedFromEvents !== undefined) {
|
|
336
|
+
pushIf(
|
|
337
|
+
errors,
|
|
338
|
+
isStringArray(t.derivedFromEvents),
|
|
339
|
+
"derivedFromEvents must be string[] when present"
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
validateBase(t, errors);
|
|
344
|
+
return { valid: errors.length === 0, errors };
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ─── Generic dispatch ────────────────────────────────────────────────────
|
|
348
|
+
|
|
349
|
+
function validate(entity) {
|
|
350
|
+
if (!isPlainObject(entity)) {
|
|
351
|
+
return { valid: false, errors: ["entity must be a plain object"] };
|
|
352
|
+
}
|
|
353
|
+
switch (entity.type) {
|
|
354
|
+
case ENTITY_TYPES.PERSON:
|
|
355
|
+
return validatePerson(entity);
|
|
356
|
+
case ENTITY_TYPES.EVENT:
|
|
357
|
+
return validateEvent(entity);
|
|
358
|
+
case ENTITY_TYPES.PLACE:
|
|
359
|
+
return validatePlace(entity);
|
|
360
|
+
case ENTITY_TYPES.ITEM:
|
|
361
|
+
return validateItem(entity);
|
|
362
|
+
case ENTITY_TYPES.TOPIC:
|
|
363
|
+
return validateTopic(entity);
|
|
364
|
+
default:
|
|
365
|
+
return {
|
|
366
|
+
valid: false,
|
|
367
|
+
errors: [`unknown entity type: ${entity.type} (expected one of ${Object.values(ENTITY_TYPES).join("|")})`],
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
module.exports = {
|
|
373
|
+
validate,
|
|
374
|
+
validatePerson,
|
|
375
|
+
validateEvent,
|
|
376
|
+
validatePlace,
|
|
377
|
+
validateItem,
|
|
378
|
+
validateTopic,
|
|
379
|
+
};
|