@pdpp/read-core 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +35 -0
  2. package/package.json +38 -0
  3. package/src/index.js +443 -0
package/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # @pdpp/read-core
2
+
3
+ `@pdpp/read-core` contains pure, adapter-agnostic primitives for shaping PDPP
4
+ read results into bounded previews, continuation descriptors, handle metadata,
5
+ and deliberate escalation paths.
6
+
7
+ It is the shared core consumed by independently-installed read adapters
8
+ (`@pdpp/mcp-server`, `@pdpp/cli`, and future REST/SDK adapters). Given records,
9
+ record-sets, or field reads, it returns structured, bounded descriptors that
10
+ any adapter can render — the adapter decides how to present them.
11
+
12
+ ## Boundary (what belongs here, and what does not)
13
+
14
+ **In scope** — pure read shaping only:
15
+
16
+ - Bounded previews of records and record-sets.
17
+ - Content ladders and field-window evidence (progressive disclosure).
18
+ - Continuation descriptors and opaque content handles (encode/decode).
19
+ - Binary/blob field metadata.
20
+ - Deterministic truncation and stable inline JSON.
21
+ - Stable record identity for evidence.
22
+
23
+ **Out of scope** — keep these in the adapter or server layers:
24
+
25
+ - No authorization, grants, or token semantics.
26
+ - No HTTP, transport, or networking.
27
+ - No filesystem or export I/O.
28
+ - No CLI parsing or flag handling.
29
+ - No UI rendering beyond minimal stable descriptors.
30
+ - No connector-specific or source-specific business semantics.
31
+ - No single-client quirks baked in as core semantics (client workarounds
32
+ belong in the adapter that needs them).
33
+
34
+ The package keeps a deliberately small, stable public API so adapters can build
35
+ against it safely.
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@pdpp/read-core",
3
+ "version": "0.0.0",
4
+ "type": "module",
5
+ "description": "Pure, adapter-agnostic primitives for shaping PDPP read results into bounded previews, continuation descriptors, handle metadata, and escalation paths.",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/vana-com/pdpp.git",
9
+ "directory": "packages/read-core"
10
+ },
11
+ "engines": {
12
+ "node": ">=22.14.0"
13
+ },
14
+ "exports": {
15
+ ".": "./src/index.js"
16
+ },
17
+ "files": [
18
+ "src/",
19
+ "README.md"
20
+ ],
21
+ "scripts": {
22
+ "test": "node --test test/*.test.js",
23
+ "verify": "pnpm test"
24
+ },
25
+ "keywords": [
26
+ "pdpp",
27
+ "personal-data",
28
+ "read",
29
+ "read-surface"
30
+ ],
31
+ "license": "Apache-2.0",
32
+ "publishConfig": {
33
+ "access": "public",
34
+ "provenance": false,
35
+ "registry": "https://registry.npmjs.org/",
36
+ "tag": "latest"
37
+ }
38
+ }
package/src/index.js ADDED
@@ -0,0 +1,443 @@
1
+ const DEFAULT_RECORD_PREVIEW_LIMIT = 5;
2
+ const DEFAULT_RECORD_PREVIEW_CHAR_LIMIT = 1792;
3
+ const DEFAULT_RECORD_PREVIEW_FOOTER_RESERVE = 96;
4
+ const DEFAULT_RECORD_PREVIEW_MIN_RECORD_CHARS = 24;
5
+ const DEFAULT_RECORD_PREVIEW_TRUNCATED_MARKER =
6
+ 'record_preview_truncated=true; followup=rerun_limit; machine envelope in structuredContent.data';
7
+ const DEFAULT_FIELD_WINDOW_LIMIT_CHARS = 2048;
8
+ const DEFAULT_FIELD_WINDOW_LIMIT = 8;
9
+ const DEFAULT_BINARY_FIELD_LIMIT = 8;
10
+
11
+ const OMIT_FIELD_KEYS = new Set([
12
+ 'id',
13
+ 'record_id',
14
+ 'recordId',
15
+ 'connection_id',
16
+ 'connector_instance_id',
17
+ 'connector_key',
18
+ 'stream',
19
+ 'metadata',
20
+ '_meta',
21
+ ]);
22
+
23
+ export function stableInlineJson(value) {
24
+ return JSON.stringify(value);
25
+ }
26
+
27
+ export function truncateText(value, limit) {
28
+ const text = String(value ?? '');
29
+ if (!Number.isFinite(limit) || limit <= 0) return '';
30
+ if (text.length <= limit) return text;
31
+ if (limit <= 1) return '…'.slice(0, limit);
32
+ return `${text.slice(0, limit - 1)}…`;
33
+ }
34
+
35
+ export function extractRecordRows(body) {
36
+ if (Array.isArray(body)) return body;
37
+ if (Array.isArray(body?.records)) return body.records;
38
+ if (Array.isArray(body?.data)) return body.data;
39
+ if (Array.isArray(body?.data?.records)) return body.data.records;
40
+ return [];
41
+ }
42
+
43
+ export function summarizeRecordEvidence(body, label, options = {}) {
44
+ const limit = options.recordLimit ?? DEFAULT_RECORD_PREVIEW_LIMIT;
45
+ const charLimit = options.charLimit ?? DEFAULT_RECORD_PREVIEW_CHAR_LIMIT;
46
+ const footerReserve = options.footerReserve ?? DEFAULT_RECORD_PREVIEW_FOOTER_RESERVE;
47
+ const minRecordChars = options.minRecordChars ?? DEFAULT_RECORD_PREVIEW_MIN_RECORD_CHARS;
48
+ const truncatedMarker =
49
+ options.truncatedMarker ?? DEFAULT_RECORD_PREVIEW_TRUNCATED_MARKER;
50
+ const records = extractRecordRows(body);
51
+ const hasMore = envelopeField(body, 'has_more') === true ? ' has_more=true.' : '';
52
+ const handles = formatEnvelopeHandles(body);
53
+
54
+ if (records.length === 0) return `${label}: 0 record(s).${handles}`;
55
+
56
+ const shown = Math.min(records.length, limit);
57
+ const lines = [`${label}: ${records.length} record(s).${hasMore}${handles} Showing up to ${shown}:`];
58
+ const contentCeiling = charLimit - footerReserve;
59
+ let used = lines[0].length;
60
+ let truncated = false;
61
+
62
+ for (const [index, record] of records.slice(0, limit).entries()) {
63
+ const prefix = `record[${index}] `;
64
+ const budget = contentCeiling - used - prefix.length - 1;
65
+ if (budget < minRecordChars) {
66
+ truncated = true;
67
+ break;
68
+ }
69
+ const inlineRecord = stableInlineJson(sanitizeRecordForEvidence(record));
70
+ if (inlineRecord.length > budget) truncated = true;
71
+ const rendered = `${prefix}${truncateText(inlineRecord, budget)}`;
72
+ lines.push(rendered);
73
+ used += rendered.length + 1;
74
+ }
75
+
76
+ if (truncated && records.length > 1) {
77
+ lines.push(truncatedMarker);
78
+ } else if (records.length > limit) {
79
+ lines.push(
80
+ `more_records=${records.length - limit}; followup=rerun_cursor_or_limit; machine envelope in structuredContent.data`
81
+ );
82
+ }
83
+
84
+ return lines.join('\n');
85
+ }
86
+
87
+ export function summarizeFieldWindowEvidence(body) {
88
+ const fieldPath = firstString(body?.field?.path, body?.field_path, body?.field);
89
+ const stream = firstString(body?.stream);
90
+ const recordId = firstString(body?.record_id, body?.recordId);
91
+ const connectionId = firstString(body?.connection_id, body?.connector_instance_id);
92
+ const window = objectValue(body?.window) ?? {};
93
+ const start = numberValue(window.start_chars);
94
+ const end = numberValue(window.end_chars);
95
+ const complete = window.complete === true;
96
+ const nextCursor = firstString(window.next_cursor);
97
+ const previousCursor = firstString(window.previous_cursor);
98
+ const range =
99
+ start !== null && end !== null ? `chars ${start}..${end}` : start !== null ? `chars ${start}..` : 'chars';
100
+ const identity = [connectionId, stream, recordId].filter(Boolean).join('/');
101
+ const cursorText = [
102
+ nextCursor ? `next_cursor=${formatScalar(nextCursor)}` : null,
103
+ previousCursor ? `previous_cursor=${formatScalar(previousCursor)}` : null,
104
+ ]
105
+ .filter(Boolean)
106
+ .join(' ');
107
+
108
+ return [
109
+ `record=${identity || 'unknown'} field=${fieldPath || 'unknown'} ${range} complete=${complete}`,
110
+ cursorText,
111
+ String(window.text ?? ''),
112
+ ]
113
+ .filter(Boolean)
114
+ .join('\n');
115
+ }
116
+
117
+ export function formatEnvelopeHandles(body) {
118
+ const parts = [];
119
+ const nextCursor = envelopeField(body, 'next_cursor');
120
+ if (nextCursor) parts.push(`next_cursor=${formatScalar(nextCursor)}`);
121
+ const nextChangesSince = envelopeField(body, 'next_changes_since');
122
+ if (nextChangesSince) parts.push(`next_changes_since=${formatScalar(nextChangesSince)}`);
123
+ const count = envelopeCount(body);
124
+ if (count) parts.push(`count=${count}`);
125
+ return parts.length > 0 ? ` ${parts.join(' ')}.` : '';
126
+ }
127
+
128
+ export function buildRecordContentLadder(record, options = {}) {
129
+ const identity = recordContentIdentity(record, options.fallback);
130
+ if (!identity) return null;
131
+
132
+ const encodeResourceUri = options.encodeResourceUri ?? defaultEncodeResourceUri;
133
+ const fieldWindows = recordContentFields(record, identity, {
134
+ encodeResourceUri,
135
+ fieldLimit: options.fieldLimit ?? DEFAULT_FIELD_WINDOW_LIMIT,
136
+ windowLimitChars: options.windowLimitChars ?? DEFAULT_FIELD_WINDOW_LIMIT_CHARS,
137
+ });
138
+ const binaryFields = recordContentBinaryFields(record, {
139
+ binaryLimit: options.binaryLimit ?? DEFAULT_BINARY_FIELD_LIMIT,
140
+ });
141
+
142
+ return {
143
+ id: identity.id,
144
+ connection_id: identity.connectionId,
145
+ stream: identity.stream,
146
+ record_id: identity.recordId,
147
+ record_uri: encodeResourceUri('record', {
148
+ connection_id: identity.connectionId,
149
+ stream: identity.stream,
150
+ record_id: identity.recordId,
151
+ }),
152
+ field_windows: fieldWindows,
153
+ ...(binaryFields.length > 0 ? { binary_fields: binaryFields } : {}),
154
+ };
155
+ }
156
+
157
+ export function buildRecordSetContentLadder(body, options = {}) {
158
+ const records = extractRecordRows(body)
159
+ .map((record) => buildRecordContentLadder(record, options))
160
+ .filter(Boolean)
161
+ .slice(0, options.recordLimit ?? DEFAULT_RECORD_PREVIEW_LIMIT);
162
+ if (records.length === 0) return null;
163
+ return {
164
+ kind: 'record_set',
165
+ read_tool: options.readTool ?? 'read_record_field',
166
+ records,
167
+ };
168
+ }
169
+
170
+ export function defaultEncodeResourceUri(kind, payload) {
171
+ return `pdpp://${kind}/${encodeContentHandle(kind, payload)}`;
172
+ }
173
+
174
+ export function encodeContentHandle(kind, payload) {
175
+ return base64UrlEncode(
176
+ JSON.stringify({
177
+ v: 1,
178
+ kind,
179
+ ...payload,
180
+ })
181
+ );
182
+ }
183
+
184
+ export function decodeContentHandle(handle, expectedKind) {
185
+ const payload = JSON.parse(base64UrlDecode(String(handle)));
186
+ if (!payload || typeof payload !== 'object' || Array.isArray(payload)) {
187
+ throw new Error('Content handle is malformed.');
188
+ }
189
+ if (payload.v !== 1 || payload.kind !== expectedKind) {
190
+ throw new Error('Content handle has the wrong kind or version.');
191
+ }
192
+ return payload;
193
+ }
194
+
195
+ export function sanitizeRecordForEvidence(record) {
196
+ if (!record || typeof record !== 'object' || Array.isArray(record)) return record;
197
+ const sanitized = {};
198
+ for (const [key, value] of Object.entries(record)) {
199
+ if (key === 'data' && value && typeof value === 'object' && !Array.isArray(value)) {
200
+ sanitized[key] = sanitizePayloadObject(value);
201
+ continue;
202
+ }
203
+ sanitized[key] = binaryFieldMetadata(key, value) ?? sanitizeEvidenceValue(value);
204
+ }
205
+ return sanitized;
206
+ }
207
+
208
+ export function binaryFieldMetadata(fieldPath, value) {
209
+ if (typeof fieldPath !== 'string' || fieldPath.length === 0 || OMIT_FIELD_KEYS.has(fieldPath)) return null;
210
+
211
+ const blob = blobRefMetadata(value);
212
+ if (blob) {
213
+ return {
214
+ field_path: fieldPath,
215
+ binary_field: true,
216
+ text_like: false,
217
+ preview_status: 'binary-only',
218
+ ...blob,
219
+ };
220
+ }
221
+
222
+ if (isLargeBase64Field(fieldPath, value)) {
223
+ return {
224
+ field_path: fieldPath,
225
+ binary_field: true,
226
+ text_like: false,
227
+ preview_status: 'binary-only',
228
+ encoding: 'base64',
229
+ size_chars: value.length,
230
+ };
231
+ }
232
+
233
+ return null;
234
+ }
235
+
236
+ function recordContentIdentity(record, fallback = {}) {
237
+ const payload = objectValue(record?.data) || objectValue(record?.record) || objectValue(record);
238
+ const directId = firstString(fallback?.id, record?.id, record?.result_id, record?.record_id, record?.recordId);
239
+ const parsed = directId ? parseRecordResultId(directId) : null;
240
+ const stream = firstString(
241
+ fallback?.stream,
242
+ record?.stream,
243
+ record?.stream_name,
244
+ payload?.stream,
245
+ parsed?.stream
246
+ );
247
+ const recordId = firstString(
248
+ fallback?.recordId,
249
+ fallback?.record_id,
250
+ record?.record_id,
251
+ record?.recordId,
252
+ payload?.id,
253
+ payload?.record_id,
254
+ parsed?.recordId
255
+ );
256
+ const connectionId = firstString(
257
+ fallback?.connectionId,
258
+ fallback?.connection_id,
259
+ record?.connection_id,
260
+ record?.connector_instance_id,
261
+ payload?.connection_id,
262
+ parsed?.connectionId
263
+ );
264
+
265
+ if (!stream || !recordId) return null;
266
+ const id = connectionId ? `${connectionId}/${stream}:${recordId}` : `${stream}:${recordId}`;
267
+ return { id, connectionId: connectionId ?? null, stream, recordId };
268
+ }
269
+
270
+ function recordContentFields(record, identity, options) {
271
+ const payload = objectValue(record?.data) || objectValue(record?.record) || objectValue(record);
272
+ return Object.entries(payload)
273
+ .filter(([fieldPath, value]) => isContentStringField(fieldPath, value))
274
+ .slice(0, options.fieldLimit)
275
+ .map(([fieldPath, value]) => ({
276
+ field_path: fieldPath,
277
+ text_like: true,
278
+ preview_status: value.length > options.windowLimitChars ? 'truncated' : 'complete',
279
+ size_chars: value.length,
280
+ read: {
281
+ tool: 'read_record_field',
282
+ args: {
283
+ id: identity.id,
284
+ field_path: fieldPath,
285
+ offset_chars: 0,
286
+ limit_chars: options.windowLimitChars,
287
+ },
288
+ },
289
+ resource_uri: options.encodeResourceUri('field-window', {
290
+ connection_id: identity.connectionId,
291
+ stream: identity.stream,
292
+ record_id: identity.recordId,
293
+ field_path: fieldPath,
294
+ offset_chars: 0,
295
+ limit_chars: options.windowLimitChars,
296
+ }),
297
+ }));
298
+ }
299
+
300
+ function recordContentBinaryFields(record, options) {
301
+ const payload = objectValue(record?.data) || objectValue(record?.record) || objectValue(record);
302
+ return Object.entries(payload)
303
+ .map(([fieldPath, value]) => binaryFieldMetadata(fieldPath, value))
304
+ .filter(Boolean)
305
+ .slice(0, options.binaryLimit);
306
+ }
307
+
308
+ function sanitizePayloadObject(payload) {
309
+ const out = {};
310
+ for (const [key, value] of Object.entries(payload)) {
311
+ const binary = binaryFieldMetadata(key, value);
312
+ out[key] = binary ?? sanitizeEvidenceValue(value);
313
+ }
314
+ return out;
315
+ }
316
+
317
+ function sanitizeEvidenceValue(value) {
318
+ if (Array.isArray(value)) return value.map((entry) => sanitizeEvidenceValue(entry));
319
+ if (!value || typeof value !== 'object') return value;
320
+ const blob = blobRefMetadata(value);
321
+ if (blob) {
322
+ return {
323
+ text_like: false,
324
+ preview_status: 'binary-only',
325
+ ...blob,
326
+ };
327
+ }
328
+ return sanitizePayloadObject(value);
329
+ }
330
+
331
+ function isContentStringField(fieldPath, value) {
332
+ return (
333
+ typeof fieldPath === 'string' &&
334
+ fieldPath.length > 0 &&
335
+ !OMIT_FIELD_KEYS.has(fieldPath) &&
336
+ typeof value === 'string' &&
337
+ value.length > 0 &&
338
+ !isLargeBase64Field(fieldPath, value) &&
339
+ !fieldPath.includes('/') &&
340
+ !fieldPath.includes('\\') &&
341
+ fieldPath !== '.' &&
342
+ fieldPath !== '..' &&
343
+ !fieldPath.includes('..')
344
+ );
345
+ }
346
+
347
+ function isLargeBase64Field(fieldPath, value) {
348
+ if (typeof value !== 'string' || value.length < 256) return false;
349
+ if (value.length % 4 !== 0) return false;
350
+ if (new Set(value).size < 4) return false;
351
+ return /^[A-Za-z0-9+/]+={0,2}$/.test(value);
352
+ }
353
+
354
+ function blobRefMetadata(value) {
355
+ const obj = objectValue(value);
356
+ if (!obj) return null;
357
+
358
+ const blobId = firstString(obj.blob_id, obj.blobId, obj.id);
359
+ const fetchUrl = firstString(obj.fetch_url, obj.fetchUrl, obj.url, obj.href);
360
+ const mimeType = firstString(obj.mime_type, obj.mimeType, obj.content_type, obj.contentType);
361
+ const digest = firstString(obj.digest, obj.sha256, obj.content_digest, obj.contentDigest);
362
+ const sizeBytes = numberValue(obj.size_bytes, obj.sizeBytes, obj.byte_length, obj.byteLength);
363
+
364
+ if (!blobId && !fetchUrl && !mimeType) return null;
365
+
366
+ return {
367
+ ...(blobId ? { blob_id: blobId } : {}),
368
+ ...(fetchUrl ? { fetch_url: fetchUrl } : {}),
369
+ ...(mimeType ? { mime_type: mimeType } : {}),
370
+ ...(digest ? { digest } : {}),
371
+ ...(sizeBytes !== null ? { size_bytes: sizeBytes } : {}),
372
+ };
373
+ }
374
+
375
+ function parseRecordResultId(id) {
376
+ const slash = id.indexOf('/');
377
+ const colon = id.indexOf(':', slash + 1);
378
+ if (colon <= 0) return null;
379
+ if (slash > 0) {
380
+ return {
381
+ connectionId: id.slice(0, slash),
382
+ stream: id.slice(slash + 1, colon),
383
+ recordId: id.slice(colon + 1),
384
+ };
385
+ }
386
+ return {
387
+ connectionId: null,
388
+ stream: id.slice(0, colon),
389
+ recordId: id.slice(colon + 1),
390
+ };
391
+ }
392
+
393
+ function envelopeField(body, key) {
394
+ if (body && Object.hasOwn(body, key)) return body[key];
395
+ if (body?.meta && Object.hasOwn(body.meta, key)) return body.meta[key];
396
+ if (body?.data && typeof body.data === 'object' && !Array.isArray(body.data) && Object.hasOwn(body.data, key)) {
397
+ return body.data[key];
398
+ }
399
+ return undefined;
400
+ }
401
+
402
+ function envelopeCount(body) {
403
+ const count = envelopeField(body, 'count');
404
+ if (!count || typeof count !== 'object') return null;
405
+ const kind = firstString(count.kind);
406
+ const value = numberValue(count.value);
407
+ if (!kind || value === null) return null;
408
+ return `${kind}:${value}`;
409
+ }
410
+
411
+ function objectValue(value) {
412
+ return value && typeof value === 'object' && !Array.isArray(value) ? value : null;
413
+ }
414
+
415
+ function firstString(...values) {
416
+ for (const value of values) {
417
+ if (typeof value === 'string' && value.length > 0) return value;
418
+ }
419
+ return null;
420
+ }
421
+
422
+ function numberValue(...values) {
423
+ for (const value of values) {
424
+ if (typeof value === 'number' && Number.isFinite(value)) return value;
425
+ if (typeof value === 'string' && value.trim() !== '') {
426
+ const parsed = Number.parseInt(value, 10);
427
+ if (Number.isFinite(parsed)) return parsed;
428
+ }
429
+ }
430
+ return null;
431
+ }
432
+
433
+ function formatScalar(value) {
434
+ return JSON.stringify(value);
435
+ }
436
+
437
+ function base64UrlEncode(value) {
438
+ return Buffer.from(value, 'utf8').toString('base64url');
439
+ }
440
+
441
+ function base64UrlDecode(value) {
442
+ return Buffer.from(value, 'base64url').toString('utf8');
443
+ }