@chainlesschain/personal-data-hub 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
  2. package/__tests__/analysis.test.js +1 -1
  3. package/__tests__/longtail-adapters.test.js +67 -16
  4. package/__tests__/messaging-qq-snapshot.test.js +294 -0
  5. package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
  6. package/__tests__/shopping-snapshot.test.js +438 -0
  7. package/__tests__/social-adapters.test.js +28 -3
  8. package/__tests__/social-douyin-snapshot.test.js +253 -0
  9. package/__tests__/social-kuaishou-snapshot.test.js +309 -0
  10. package/__tests__/social-toutiao-snapshot.test.js +314 -0
  11. package/__tests__/social-weibo-snapshot.test.js +234 -0
  12. package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
  13. package/__tests__/travel-maps-snapshot.test.js +426 -0
  14. package/__tests__/vault-driver-error.test.js +74 -0
  15. package/lib/adapters/messaging-qq/index.js +498 -92
  16. package/lib/adapters/shopping-jd/index.js +228 -25
  17. package/lib/adapters/shopping-meituan/index.js +222 -26
  18. package/lib/adapters/shopping-pinduoduo/index.js +275 -0
  19. package/lib/adapters/social-douyin/index.js +454 -63
  20. package/lib/adapters/social-kuaishou/index.js +379 -127
  21. package/lib/adapters/social-toutiao/index.js +400 -130
  22. package/lib/adapters/social-weibo/index.js +393 -95
  23. package/lib/adapters/social-xiaohongshu/index.js +389 -49
  24. package/lib/adapters/travel-baidu-map/index.js +286 -26
  25. package/lib/adapters/travel-tencent-map/index.js +414 -0
  26. package/lib/index.js +5 -1
  27. package/lib/vault.js +60 -8
  28. package/package.json +2 -1
@@ -1,96 +1,436 @@
1
1
  /**
2
- * Phase 13.4 — Xiaohongshu (小红书) adapter.
2
+ * §A8 v0.2 — Xiaohongshu (小红书) adapter, dual-mode (snapshot + sqlite).
3
3
  *
4
- * Per sjqz/parsers/lifestyle.py XiaohongshuParser. Tables:
5
- * - note / browse_history viewed notes
6
- * - liked_note / favourite collected notes
7
- * - search_history queries
4
+ * Mirror of social-weibo/index.js dual-mode pattern:
5
+ *
6
+ * 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
7
+ * JSON produced by the phone's XhsLocalCollector (WebView + OkHttp +
8
+ * X-S signed requests). account.uid OPTIONAL at construction (snapshot
9
+ * file carries account).
10
+ *
11
+ * 2. sqlite mode (opts.dbPath, legacy): Phase 13.4 device-pull path —
12
+ * reads xhs Android app's SQLite (browse_history / liked_note /
13
+ * favourite / search_history). account.uid REQUIRED at sync time.
14
+ *
15
+ * Snapshot schema (mirrors XhsLocalCollector.SNAPSHOT_SCHEMA_VERSION):
16
+ *
17
+ * {
18
+ * "schemaVersion": 1,
19
+ * "snapshottedAt": <epoch-ms>,
20
+ * "account": { "uid": "5e8c..." (xhs user_id hex string),
21
+ * "numericUid": "1234" (Long hash for sentinel),
22
+ * "displayName": "alice" },
23
+ * "events": [
24
+ * { "kind": "note", "id": "note-<noteId>", "capturedAt": <ms>,
25
+ * "title": "...", "noteId": "...", "desc": "...", "type": "normal|video",
26
+ * "likedCount": N, "collectedCount": N, "commentCount": N },
27
+ * { "kind": "liked", "id": "liked-<noteId>", "capturedAt": <ms>,
28
+ * "title": "...", "noteId": "...", "authorNickname": "..." },
29
+ * { "kind": "follow", "id": "follow-<userId>", "capturedAt": <ms>,
30
+ * "userId": "...", "nickname": "...", "image": "..." }
31
+ * ]
32
+ * }
8
33
  */
9
34
 
10
35
  "use strict";
11
36
 
12
37
  const fs = require("node:fs");
13
38
  const { newId } = require("../../ids");
39
+ const {
40
+ ENTITY_TYPES,
41
+ PERSON_SUBTYPES,
42
+ EVENT_SUBTYPES,
43
+ CAPTURED_BY,
44
+ } = require("../../constants");
14
45
 
15
46
  const NAME = "social-xiaohongshu";
16
- const VERSION = "0.5.0";
47
+ const VERSION = "0.6.0";
48
+ const SNAPSHOT_SCHEMA_VERSION = 1;
49
+
50
+ const KIND_NOTE = "note";
51
+ const KIND_LIKED = "liked";
52
+ const KIND_FOLLOW = "follow";
53
+ // legacy sqlite-mode-only kinds (preserved for backward compat normalize path)
54
+ const KIND_HISTORY = "history";
55
+ const KIND_LIKE = "like";
56
+ const KIND_FAVOURITE = "favourite";
57
+ const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_NOTE, KIND_LIKED, KIND_FOLLOW]);
58
+
59
+ function stableOriginalId(kind, id) {
60
+ const stringified =
61
+ (typeof id === "string" && id.length > 0 && id) ||
62
+ (typeof id === "number" && Number.isFinite(id) && String(id)) ||
63
+ null;
64
+ const safe =
65
+ stringified ||
66
+ `unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
67
+ return `xiaohongshu:${kind}:${safe}`;
68
+ }
69
+
70
+ function parseTime(v) {
71
+ if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
72
+ if (typeof v === "string") {
73
+ if (/^\d+$/.test(v)) {
74
+ const n = parseInt(v, 10);
75
+ return n > 1e12 ? n : n * 1000;
76
+ }
77
+ const t = Date.parse(v);
78
+ return Number.isFinite(t) ? t : null;
79
+ }
80
+ return null;
81
+ }
82
+
83
+ function trySelect(db, sql) {
84
+ try { return db.prepare(sql).all(); } catch (_e) { return null; }
85
+ }
17
86
 
18
87
  class XiaohongshuAdapter {
19
88
  constructor(opts = {}) {
20
- if (!opts.account || !opts.account.uid) {
21
- throw new Error("XiaohongshuAdapter: opts.account.uid required");
22
- }
23
- this.account = opts.account;
89
+ // §A8 v0.2: account.uid optional (snapshot mode pulls from file).
90
+ this.account = opts.account || null;
24
91
  this._dbPath = opts.dbPath || null;
25
- this._dbDriverFactory = opts.dbDriverFactory || null;
26
92
 
27
93
  this.name = NAME;
28
94
  this.version = VERSION;
29
- this.capabilities = ["sync:sqlite", "parse:xhs-history"];
95
+ this.capabilities = [
96
+ "sync:snapshot",
97
+ "sync:sqlite",
98
+ "parse:xhs-note",
99
+ "parse:xhs-liked",
100
+ "parse:xhs-follow",
101
+ "parse:xhs-history",
102
+ ];
30
103
  this.extractMode = "device-pull";
31
104
  this.rateLimits = {};
32
105
  this.dataDisclosure = {
33
- fields: ["xhs:viewed_notes / liked / favourites / search_history"],
106
+ fields: [
107
+ "xhs:notes (own posts, title / desc / type / engagement counts)",
108
+ "xhs:liked (notes the user liked)",
109
+ "xhs:follow (followed users)",
110
+ "xhs:history / search (legacy sqlite mode)",
111
+ ],
34
112
  sensitivity: "medium",
35
113
  legalGate: false,
114
+ defaultInclude: {
115
+ note: true,
116
+ liked: true,
117
+ follow: true,
118
+ },
119
+ };
120
+
121
+ this._deps = {
122
+ fs,
123
+ dbDriverFactory: opts.dbDriverFactory || null,
124
+ };
125
+ }
126
+
127
+ async authenticate(ctx = {}) {
128
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
129
+ try {
130
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
131
+ } catch (err) {
132
+ return {
133
+ ok: false,
134
+ reason: "INPUT_PATH_UNREADABLE",
135
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
136
+ };
137
+ }
138
+ return { ok: true, mode: "snapshot-file" };
139
+ }
140
+ if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
141
+ if (!this.account || !this.account.uid) {
142
+ return {
143
+ ok: false,
144
+ reason: "NO_ACCOUNT_UID",
145
+ message: "social-xiaohongshu.authenticate: sqlite mode requires account.uid",
146
+ };
147
+ }
148
+ return { ok: true, account: this.account.uid, mode: "sqlite" };
149
+ }
150
+ return {
151
+ ok: false,
152
+ reason: "NO_INPUT",
153
+ message:
154
+ "social-xiaohongshu.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
36
155
  };
37
156
  }
38
157
 
39
- async authenticate() { return { ok: true, account: this.account.uid }; }
40
- async healthCheck() { return { ok: true, lastChecked: Date.now() }; }
158
+ async healthCheck() {
159
+ return { ok: true, lastChecked: Date.now() };
160
+ }
41
161
 
42
162
  async *sync(opts = {}) {
163
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
164
+ yield* this._syncViaSnapshot(opts);
165
+ return;
166
+ }
43
167
  const dbPath = opts.dbPath || this._dbPath;
44
- if (!dbPath || !fs.existsSync(dbPath)) return;
45
- const Driver = this._dbDriverFactory
46
- ? this._dbDriverFactory()
168
+ if (dbPath) {
169
+ yield* this._syncViaSqlite({ ...opts, dbPath });
170
+ return;
171
+ }
172
+ throw new Error(
173
+ "social-xiaohongshu.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
174
+ );
175
+ }
176
+
177
+ async *_syncViaSnapshot(opts) {
178
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
179
+ const snapshot = JSON.parse(raw);
180
+ if (
181
+ !snapshot ||
182
+ typeof snapshot !== "object" ||
183
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
184
+ ) {
185
+ throw new Error(
186
+ `social-xiaohongshu.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
187
+ );
188
+ }
189
+ const fallbackCapturedAt =
190
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
191
+ ? Math.floor(snapshot.snapshottedAt)
192
+ : Date.now();
193
+
194
+ const account =
195
+ snapshot.account && typeof snapshot.account === "object"
196
+ ? snapshot.account
197
+ : null;
198
+ const include = opts.include || {};
199
+ const limit =
200
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
201
+
202
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
203
+ let emitted = 0;
204
+ for (const ev of events) {
205
+ if (emitted >= limit) return;
206
+ if (!ev || typeof ev !== "object") continue;
207
+ const kind = ev.kind;
208
+ if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
209
+ if (include[kind] === false) continue;
210
+
211
+ const capturedAt =
212
+ parseTime(ev.capturedAt) ||
213
+ parseTime(ev.time) ||
214
+ fallbackCapturedAt;
215
+ const id =
216
+ (typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
217
+ ev.noteId ||
218
+ ev.userId ||
219
+ null;
220
+
221
+ yield {
222
+ adapter: NAME,
223
+ kind,
224
+ originalId: stableOriginalId(kind, id),
225
+ capturedAt,
226
+ payload: { ...ev, account },
227
+ };
228
+ emitted += 1;
229
+ }
230
+ }
231
+
232
+ async *_syncViaSqlite(opts) {
233
+ if (!this.account || !this.account.uid) {
234
+ throw new Error(
235
+ "social-xiaohongshu._syncViaSqlite: account.uid required",
236
+ );
237
+ }
238
+ const dbPath = opts.dbPath;
239
+ if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
240
+ const Driver = this._deps.dbDriverFactory
241
+ ? this._deps.dbDriverFactory()
47
242
  : require("better-sqlite3-multiple-ciphers");
48
243
  const db = new Driver(dbPath, { readonly: true });
49
244
  try {
50
- const histories = trySelect(db, "SELECT * FROM browse_history ORDER BY view_time DESC LIMIT 5000")
51
- || trySelect(db, "SELECT * FROM note ORDER BY view_time DESC LIMIT 5000") || [];
245
+ const histories =
246
+ trySelect(db, "SELECT * FROM browse_history ORDER BY view_time DESC LIMIT 5000")
247
+ || trySelect(db, "SELECT * FROM note ORDER BY view_time DESC LIMIT 5000")
248
+ || [];
52
249
  for (const row of histories) {
53
- yield { adapter: NAME, originalId: `history-${row.id || row.note_id}`, capturedAt: parseTime(row.view_time), payload: { row, kind: "history" } };
250
+ yield {
251
+ adapter: NAME,
252
+ originalId: `history-${row.id || row.note_id}`,
253
+ capturedAt: parseTime(row.view_time),
254
+ payload: { row, kind: KIND_HISTORY },
255
+ };
54
256
  }
55
257
  const likes = trySelect(db, "SELECT * FROM liked_note ORDER BY like_time DESC LIMIT 5000") || [];
56
258
  for (const row of likes) {
57
- yield { adapter: NAME, originalId: `like-${row.id || row.note_id}`, capturedAt: parseTime(row.like_time), payload: { row, kind: "like" } };
259
+ yield {
260
+ adapter: NAME,
261
+ originalId: `like-${row.id || row.note_id}`,
262
+ capturedAt: parseTime(row.like_time),
263
+ payload: { row, kind: KIND_LIKE },
264
+ };
58
265
  }
59
266
  const favs = trySelect(db, "SELECT * FROM favourite ORDER BY save_time DESC LIMIT 5000") || [];
60
267
  for (const row of favs) {
61
- yield { adapter: NAME, originalId: `fav-${row.id || row.note_id}`, capturedAt: parseTime(row.save_time), payload: { row, kind: "favourite" } };
268
+ yield {
269
+ adapter: NAME,
270
+ originalId: `fav-${row.id || row.note_id}`,
271
+ capturedAt: parseTime(row.save_time),
272
+ payload: { row, kind: KIND_FAVOURITE },
273
+ };
62
274
  }
63
275
  } finally {
64
- try { db.close(); } catch (_e) {}
276
+ try { db.close(); } catch (_e) { /* ignore */ }
65
277
  }
66
278
  }
67
279
 
68
280
  normalize(raw) {
69
- const { kind, row } = raw.payload;
70
- const now = Date.now();
71
- const occurredAt = parseTime(row.view_time || row.like_time || row.save_time) || now;
72
- const source = { adapter: NAME, adapterVersion: VERSION, originalId: raw.originalId, capturedAt: occurredAt, capturedBy: "sqlite" };
73
- const subtypeMap = { history: "browse", like: "like", favourite: "like" };
74
- return {
75
- events: [{
76
- id: newId(), type: "event",
77
- subtype: subtypeMap[kind] || "browse",
78
- occurredAt, actor: "person-self",
79
- content: { title: row.title || row.note_title || "(no title)" },
80
- ingestedAt: now, source,
81
- extra: { noteId: row.note_id || null, author: row.author || row.nickname || null, kind },
82
- }],
83
- persons: [], places: [], items: [], topics: [],
84
- };
281
+ if (!raw || !raw.payload) {
282
+ throw new Error("XiaohongshuAdapter.normalize: payload missing");
283
+ }
284
+ const ingestedAt = Date.now();
285
+ const kind = raw.kind || raw.payload.kind;
286
+ const p = raw.payload;
287
+
288
+ // Sqlite mode (legacy) — payload.row + kind in {history, like, favourite}
289
+ if (kind === KIND_HISTORY || kind === KIND_LIKE || kind === KIND_FAVOURITE) {
290
+ return normalizeSqliteRow(p, raw, ingestedAt);
291
+ }
292
+
293
+ // Snapshot mode
294
+ if (kind === KIND_NOTE) return normalizeNote(p, raw, ingestedAt);
295
+ if (kind === KIND_LIKED) return normalizeLiked(p, raw, ingestedAt);
296
+ if (kind === KIND_FOLLOW) return normalizeFollow(p, raw, ingestedAt);
297
+ throw new Error(`XiaohongshuAdapter.normalize: unknown kind ${kind}`);
85
298
  }
86
299
  }
87
- function trySelect(db, sql) { try { return db.prepare(sql).all(); } catch (_e) { return null; } }
88
- function parseTime(v) {
89
- if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
90
- if (typeof v === "string") {
91
- if (/^\d+$/.test(v)) { const n = parseInt(v, 10); return n > 1e12 ? n : n * 1000; }
92
- const t = Date.parse(v); return Number.isFinite(t) ? t : null;
93
- }
94
- return null;
300
+
301
+ function buildSource(raw, occurredAt, capturedBy) {
302
+ return {
303
+ adapter: NAME,
304
+ adapterVersion: VERSION,
305
+ originalId: raw.originalId,
306
+ capturedAt: raw.capturedAt || occurredAt,
307
+ capturedBy: capturedBy || CAPTURED_BY.SQLITE,
308
+ };
309
+ }
310
+
311
+ function normalizeSqliteRow(p, raw, ingestedAt) {
312
+ const { kind, row } = p;
313
+ const occurredAt =
314
+ parseTime(row.view_time || row.like_time || row.save_time) ||
315
+ raw.capturedAt ||
316
+ ingestedAt;
317
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
318
+ const subtypeMap = {
319
+ [KIND_HISTORY]: EVENT_SUBTYPES.BROWSE,
320
+ [KIND_LIKE]: EVENT_SUBTYPES.LIKE,
321
+ [KIND_FAVOURITE]: EVENT_SUBTYPES.LIKE,
322
+ };
323
+ return {
324
+ events: [{
325
+ id: newId(),
326
+ type: ENTITY_TYPES.EVENT,
327
+ subtype: subtypeMap[kind] || EVENT_SUBTYPES.BROWSE,
328
+ occurredAt,
329
+ actor: "person-self",
330
+ content: { title: row.title || row.note_title || "(no title)" },
331
+ ingestedAt,
332
+ source,
333
+ extra: {
334
+ platform: "xiaohongshu",
335
+ noteId: row.note_id || null,
336
+ author: row.author || row.nickname || null,
337
+ kind,
338
+ },
339
+ }],
340
+ persons: [], places: [], items: [], topics: [],
341
+ };
95
342
  }
96
- module.exports = { XiaohongshuAdapter, NAME, VERSION };
343
+
344
+ function normalizeNote(p, raw, ingestedAt) {
345
+ const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
346
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
347
+ const title = p.title || "(no title)";
348
+ return {
349
+ events: [{
350
+ id: newId(),
351
+ type: ENTITY_TYPES.EVENT,
352
+ subtype: EVENT_SUBTYPES.POST,
353
+ occurredAt,
354
+ actor: "person-self",
355
+ content: {
356
+ title,
357
+ text: p.desc || "",
358
+ },
359
+ ingestedAt,
360
+ source,
361
+ extra: {
362
+ platform: "xiaohongshu",
363
+ noteId: p.noteId,
364
+ type: p.type || "normal",
365
+ likedCount: p.likedCount || 0,
366
+ collectedCount: p.collectedCount || 0,
367
+ commentCount: p.commentCount || 0,
368
+ },
369
+ }],
370
+ persons: [], places: [], items: [], topics: [],
371
+ };
372
+ }
373
+
374
+ function normalizeLiked(p, raw, ingestedAt) {
375
+ const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
376
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
377
+ const title = p.title || "(no title)";
378
+ return {
379
+ events: [{
380
+ id: newId(),
381
+ type: ENTITY_TYPES.EVENT,
382
+ subtype: EVENT_SUBTYPES.LIKE,
383
+ occurredAt,
384
+ actor: "person-self",
385
+ content: { title },
386
+ ingestedAt,
387
+ source,
388
+ extra: {
389
+ platform: "xiaohongshu",
390
+ noteId: p.noteId,
391
+ authorNickname: p.authorNickname || null,
392
+ },
393
+ }],
394
+ persons: [], places: [], items: [], topics: [],
395
+ };
396
+ }
397
+
398
+ function normalizeFollow(p, raw, ingestedAt) {
399
+ const followUid =
400
+ (typeof p.userId === "string" && p.userId.length > 0 && p.userId) ||
401
+ `unknown-${newId()}`;
402
+ const nickname = p.nickname || "(unnamed)";
403
+ const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
404
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
405
+ const person = {
406
+ id: `person-xiaohongshu-${followUid}`,
407
+ type: ENTITY_TYPES.PERSON,
408
+ subtype: PERSON_SUBTYPES.CONTACT,
409
+ names: [nickname],
410
+ ingestedAt,
411
+ source,
412
+ identifiers: {
413
+ "xiaohongshu-uid": [String(followUid)],
414
+ },
415
+ extra: {
416
+ platform: "xiaohongshu",
417
+ image: p.image || null,
418
+ followedAt: occurredAt,
419
+ },
420
+ };
421
+ return {
422
+ events: [],
423
+ persons: [person],
424
+ places: [],
425
+ items: [],
426
+ topics: [],
427
+ };
428
+ }
429
+
430
+ module.exports = {
431
+ XiaohongshuAdapter,
432
+ NAME,
433
+ VERSION,
434
+ SNAPSHOT_SCHEMA_VERSION,
435
+ VALID_SNAPSHOT_KINDS,
436
+ };