@chainlesschain/personal-data-hub 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
  2. package/__tests__/analysis.test.js +1 -1
  3. package/__tests__/longtail-adapters.test.js +67 -16
  4. package/__tests__/messaging-qq-snapshot.test.js +294 -0
  5. package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
  6. package/__tests__/shopping-snapshot.test.js +438 -0
  7. package/__tests__/social-adapters.test.js +28 -3
  8. package/__tests__/social-douyin-snapshot.test.js +253 -0
  9. package/__tests__/social-kuaishou-snapshot.test.js +309 -0
  10. package/__tests__/social-toutiao-snapshot.test.js +314 -0
  11. package/__tests__/social-weibo-snapshot.test.js +234 -0
  12. package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
  13. package/__tests__/travel-maps-snapshot.test.js +426 -0
  14. package/__tests__/vault-driver-error.test.js +74 -0
  15. package/lib/adapters/messaging-qq/index.js +498 -92
  16. package/lib/adapters/shopping-jd/index.js +228 -25
  17. package/lib/adapters/shopping-meituan/index.js +222 -26
  18. package/lib/adapters/shopping-pinduoduo/index.js +275 -0
  19. package/lib/adapters/social-douyin/index.js +454 -63
  20. package/lib/adapters/social-kuaishou/index.js +379 -127
  21. package/lib/adapters/social-toutiao/index.js +400 -130
  22. package/lib/adapters/social-weibo/index.js +393 -95
  23. package/lib/adapters/social-xiaohongshu/index.js +389 -49
  24. package/lib/adapters/travel-baidu-map/index.js +286 -26
  25. package/lib/adapters/travel-tencent-map/index.js +414 -0
  26. package/lib/index.js +5 -1
  27. package/lib/vault.js +60 -8
  28. package/package.json +2 -1
@@ -1,44 +1,106 @@
1
1
  /**
2
- * Phase 13.9(+) — Kuaishou 快手 adapter (v0.1 scaffold).
2
+ * §A8 v0.2 — Kuaishou (快手) adapter, dual-mode (snapshot + sqlite).
3
3
  *
4
- * Source: 快手 Android app stores user history in SQLite under
5
- * /data/data/com.smile.gifmaker/databases/. Schema is reverse-engineered
6
- * from sjqz parsers and pinned at scaffold quality only — Phase 13.10 will
7
- * fixture-pin real field names after Xiaomi 24115RA8EC E2E.
4
+ * Mirror of social-toutiao v0.2 two-mode pattern:
8
5
  *
9
- * Conjectured tables ( fixture pin):
10
- * - photo_history watched short-videos (kuaishou calls them "photos")
11
- * - user_collect collected (saved) videos
12
- * - search_record user search queries
6
+ * 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
7
+ * JSON produced by the phone's KuaishouLocalCollector (root-required
8
+ * SQLCipher decrypt of /data/data/com.smile.gifmaker/databases/ on
9
+ * Phase 13.10 real-device E2E). Desktop-independent; account.uid
10
+ * OPTIONAL at construction — payload carries it.
13
11
  *
14
- * Each row Event with subtype "browse" (photo_history) /
15
- * "like" (user_collect) / "post" (search_record reframed as a self-authored
16
- * search event).
12
+ * 2. sqlite mode (opts.dbPath, legacy): Phase 13.9 device-pull path
13
+ * desktop reads the pulled DB directly. account.uid REQUIRED in this
14
+ * mode (lazy-checked at sync time).
17
15
  *
18
- * Mirrors social-bilibili adapter contract; sensitivity stays "medium"
19
- * (short-video watch history mainly reveals entertainment preference).
16
+ * Snapshot schema (mirrors KuaishouLocalCollector.SNAPSHOT_SCHEMA_VERSION):
17
+ *
18
+ * {
19
+ * "schemaVersion": 1,
20
+ * "snapshottedAt": <epoch-ms>,
21
+ * "account": { "uid": "12345", "displayName": "alice" },
22
+ * "events": [
23
+ * { "kind": "watch", "id": "photo-<photoId>", "capturedAt": <ms>,
24
+ * "photoId": "...", "caption": "...", "duration": N,
25
+ * "authorId": "...", "authorName": "..." },
26
+ * { "kind": "collect", "id": "collect-<photoId>", "capturedAt": <ms>,
27
+ * "photoId": "...", "caption": "...",
28
+ * "authorId": "...", "authorName": "..." },
29
+ * { "kind": "search", "id": "search-<kw>:<ts>", "capturedAt": <ms>,
30
+ * "keyword": "...", "searchAt": <ms> }
31
+ * ]
32
+ * }
33
+ *
34
+ * Sensitivity: "medium" — short-video watch history mainly reveals
35
+ * entertainment preference (vs Toutiao's news-reading which goes "high").
20
36
  */
21
37
 
22
38
  "use strict";
23
39
 
24
40
  const fs = require("node:fs");
25
41
  const { newId } = require("../../ids");
42
+ const {
43
+ ENTITY_TYPES,
44
+ EVENT_SUBTYPES,
45
+ CAPTURED_BY,
46
+ } = require("../../constants");
26
47
 
27
48
  const NAME = "social-kuaishou";
28
- const VERSION = "0.1.0";
49
+ const VERSION = "0.2.0";
50
+ const SNAPSHOT_SCHEMA_VERSION = 1;
51
+
52
+ const KIND_WATCH = "watch";
53
+ const KIND_COLLECT = "collect";
54
+ const KIND_SEARCH = "search";
55
+ const VALID_SNAPSHOT_KINDS = Object.freeze([
56
+ KIND_WATCH,
57
+ KIND_COLLECT,
58
+ KIND_SEARCH,
59
+ ]);
60
+
61
+ function stableOriginalId(kind, id) {
62
+ const stringified =
63
+ (typeof id === "string" && id.length > 0 && id) ||
64
+ (typeof id === "number" && Number.isFinite(id) && String(id)) ||
65
+ null;
66
+ const safe =
67
+ stringified ||
68
+ `unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
69
+ return `kuaishou:${kind}:${safe}`;
70
+ }
71
+
72
+ function parseTime(v) {
73
+ if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
74
+ if (typeof v === "string") {
75
+ if (/^\d+$/.test(v)) {
76
+ const n = parseInt(v, 10);
77
+ return n > 1e12 ? n : n * 1000;
78
+ }
79
+ const t = Date.parse(v);
80
+ return Number.isFinite(t) ? t : null;
81
+ }
82
+ return null;
83
+ }
84
+
85
+ function trySelect(db, sql) {
86
+ try {
87
+ return db.prepare(sql).all();
88
+ } catch (_e) {
89
+ return null;
90
+ }
91
+ }
29
92
 
30
93
  class KuaishouAdapter {
31
94
  constructor(opts = {}) {
32
- if (!opts.account || !opts.account.uid) {
33
- throw new Error("KuaishouAdapter: opts.account.uid required");
34
- }
35
- this.account = opts.account;
95
+ // §A8 v0.2: account.uid OPTIONAL at construction — snapshot mode is
96
+ // stateless. Sqlite mode lazy-checks at sync time.
97
+ this.account = opts.account || null;
36
98
  this._dbPath = opts.dbPath || null;
37
- this._dbDriverFactory = opts.dbDriverFactory || null;
38
99
 
39
100
  this.name = NAME;
40
101
  this.version = VERSION;
41
102
  this.capabilities = [
103
+ "sync:snapshot",
42
104
  "sync:sqlite",
43
105
  "parse:kuaishou-photo-history",
44
106
  "parse:kuaishou-user-collect",
@@ -54,11 +116,49 @@ class KuaishouAdapter {
54
116
  ],
55
117
  sensitivity: "medium",
56
118
  legalGate: false,
119
+ defaultInclude: {
120
+ watch: true,
121
+ collect: true,
122
+ search: true,
123
+ },
124
+ };
125
+
126
+ this._deps = {
127
+ fs,
128
+ dbDriverFactory: opts.dbDriverFactory || null,
57
129
  };
58
130
  }
59
131
 
60
- async authenticate() {
61
- return { ok: true, account: this.account.uid };
132
+ async authenticate(ctx = {}) {
133
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
134
+ try {
135
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
136
+ } catch (err) {
137
+ return {
138
+ ok: false,
139
+ reason: "INPUT_PATH_UNREADABLE",
140
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
141
+ };
142
+ }
143
+ return { ok: true, mode: "snapshot-file" };
144
+ }
145
+ if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
146
+ if (!this.account || !this.account.uid) {
147
+ return {
148
+ ok: false,
149
+ reason: "NO_ACCOUNT_UID",
150
+ message:
151
+ "social-kuaishou.authenticate: sqlite mode requires account.uid",
152
+ };
153
+ }
154
+ return { ok: true, account: this.account.uid, mode: "sqlite" };
155
+ }
156
+ return {
157
+ ok: false,
158
+ reason: "NO_INPUT",
159
+ message:
160
+ "social-kuaishou.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
161
+ };
62
162
  }
63
163
 
64
164
  async healthCheck() {
@@ -66,10 +166,85 @@ class KuaishouAdapter {
66
166
  }
67
167
 
68
168
  async *sync(opts = {}) {
169
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
170
+ yield* this._syncViaSnapshot(opts);
171
+ return;
172
+ }
69
173
  const dbPath = opts.dbPath || this._dbPath;
70
- if (!dbPath || !fs.existsSync(dbPath)) return;
71
- const Driver = this._dbDriverFactory
72
- ? this._dbDriverFactory()
174
+ if (dbPath) {
175
+ yield* this._syncViaSqlite({ ...opts, dbPath });
176
+ return;
177
+ }
178
+ throw new Error(
179
+ "social-kuaishou.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
180
+ );
181
+ }
182
+
183
+ async *_syncViaSnapshot(opts) {
184
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
185
+ const snapshot = JSON.parse(raw);
186
+ if (
187
+ !snapshot ||
188
+ typeof snapshot !== "object" ||
189
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
190
+ ) {
191
+ throw new Error(
192
+ `social-kuaishou.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
193
+ );
194
+ }
195
+ const fallbackCapturedAt =
196
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
197
+ ? Math.floor(snapshot.snapshottedAt)
198
+ : Date.now();
199
+
200
+ const account =
201
+ snapshot.account && typeof snapshot.account === "object"
202
+ ? snapshot.account
203
+ : null;
204
+ const include = opts.include || {};
205
+ const limit =
206
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
207
+
208
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
209
+ let emitted = 0;
210
+ for (const ev of events) {
211
+ if (emitted >= limit) return;
212
+ if (!ev || typeof ev !== "object") continue;
213
+ const kind = ev.kind;
214
+ if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
215
+ if (include[kind] === false) continue;
216
+
217
+ const capturedAt =
218
+ parseTime(ev.capturedAt) ||
219
+ parseTime(ev.time) ||
220
+ fallbackCapturedAt;
221
+ const id =
222
+ (typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
223
+ ev.photoId ||
224
+ ev.keyword ||
225
+ null;
226
+
227
+ yield {
228
+ adapter: NAME,
229
+ kind,
230
+ originalId: stableOriginalId(kind, id),
231
+ capturedAt,
232
+ payload: { ...ev, account },
233
+ };
234
+ emitted += 1;
235
+ }
236
+ }
237
+
238
+ async *_syncViaSqlite(opts) {
239
+ if (!this.account || !this.account.uid) {
240
+ throw new Error(
241
+ "social-kuaishou._syncViaSqlite: account.uid required (set via new KuaishouAdapter({ account: { uid } }) in cli wiring)",
242
+ );
243
+ }
244
+ const dbPath = opts.dbPath;
245
+ if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
246
+ const Driver = this._deps.dbDriverFactory
247
+ ? this._deps.dbDriverFactory()
73
248
  : require("better-sqlite3-multiple-ciphers");
74
249
  const db = new Driver(dbPath, { readonly: true });
75
250
 
@@ -84,7 +259,7 @@ class KuaishouAdapter {
84
259
  adapter: NAME,
85
260
  originalId: `photo-${row.id || row._id || row.photo_id}`,
86
261
  capturedAt: parseTime(row.view_time || row.time || row.create_time),
87
- payload: { row, kind: "watch" },
262
+ payload: { row, kind: KIND_WATCH },
88
263
  };
89
264
  }
90
265
 
@@ -98,7 +273,7 @@ class KuaishouAdapter {
98
273
  adapter: NAME,
99
274
  originalId: `collect-${row.id || row.photo_id}`,
100
275
  capturedAt: parseTime(row.collect_time || row.time),
101
- payload: { row, kind: "collect" },
276
+ payload: { row, kind: KIND_COLLECT },
102
277
  };
103
278
  }
104
279
 
@@ -112,126 +287,203 @@ class KuaishouAdapter {
112
287
  adapter: NAME,
113
288
  originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
114
289
  capturedAt: parseTime(row.search_time || row.time),
115
- payload: { row, kind: "search" },
290
+ payload: { row, kind: KIND_SEARCH },
116
291
  };
117
292
  }
118
293
  } finally {
119
294
  try {
120
295
  db.close();
121
- } catch (_e) {}
296
+ } catch (_e) {
297
+ /* ignore */
298
+ }
122
299
  }
123
300
  }
124
301
 
125
302
  normalize(raw) {
126
- if (!raw || !raw.payload || !raw.payload.row) {
127
- throw new Error("KuaishouAdapter.normalize: row missing");
303
+ if (!raw || !raw.payload) {
304
+ throw new Error("KuaishouAdapter.normalize: payload missing");
128
305
  }
129
- const { kind, row } = raw.payload;
130
- const now = Date.now();
131
- const occurredAt =
132
- parseTime(row.view_time || row.collect_time || row.search_time || row.time) ||
133
- now;
134
- const source = {
135
- adapter: NAME,
136
- adapterVersion: VERSION,
137
- originalId: raw.originalId,
138
- capturedAt: occurredAt,
139
- capturedBy: "sqlite",
140
- };
306
+ const ingestedAt = Date.now();
307
+ const kind = raw.kind || raw.payload.kind;
308
+ const p = raw.payload;
141
309
 
142
- if (kind === "collect") {
143
- return {
144
- events: [
145
- {
146
- id: newId(),
147
- type: "event",
148
- subtype: "like",
149
- occurredAt,
150
- actor: "person-self",
151
- content: { title: row.caption || row.title || "(no caption)" },
152
- ingestedAt: now,
153
- source,
154
- extra: {
155
- photoId: row.photo_id || null,
156
- authorId: row.author_id || null,
157
- authorName: row.author_name || null,
158
- },
159
- },
160
- ],
161
- persons: [],
162
- places: [],
163
- items: [],
164
- topics: [],
165
- };
310
+ if (kind === KIND_COLLECT) {
311
+ return normalizeCollect(p, raw, ingestedAt);
166
312
  }
167
- if (kind === "search") {
168
- return {
169
- events: [
170
- {
171
- id: newId(),
172
- type: "event",
173
- subtype: "post",
174
- occurredAt,
175
- actor: "person-self",
176
- content: { title: row.keyword || row.query || "(empty query)" },
177
- ingestedAt: now,
178
- source,
179
- extra: { kind: "search", keyword: row.keyword || row.query || null },
180
- },
181
- ],
182
- persons: [],
183
- places: [],
184
- items: [],
185
- topics: [],
186
- };
313
+ if (kind === KIND_SEARCH) {
314
+ return normalizeSearch(p, raw, ingestedAt);
187
315
  }
188
- // watch browse event
189
- return {
190
- events: [
191
- {
192
- id: newId(),
193
- type: "event",
194
- subtype: "browse",
195
- occurredAt,
196
- actor: "person-self",
197
- content: { title: row.caption || row.title || "(no caption)" },
198
- ingestedAt: now,
199
- source,
200
- extra: {
201
- photoId: row.photo_id || null,
202
- duration: row.duration || row.play_duration || null,
203
- authorId: row.author_id || null,
204
- authorName: row.author_name || null,
205
- },
206
- },
207
- ],
208
- persons: [],
209
- places: [],
210
- items: [],
211
- topics: [],
212
- };
316
+ if (kind === KIND_WATCH) {
317
+ return normalizeWatch(p, raw, ingestedAt);
318
+ }
319
+ throw new Error(`KuaishouAdapter.normalize: unknown kind ${kind}`);
213
320
  }
214
321
  }
215
322
 
216
- function trySelect(db, sql) {
217
- try {
218
- return db.prepare(sql).all();
219
- } catch (_e) {
220
- return null;
323
+ function buildSource(raw, occurredAt, capturedBy) {
324
+ return {
325
+ adapter: NAME,
326
+ adapterVersion: VERSION,
327
+ originalId: raw.originalId,
328
+ capturedAt: raw.capturedAt || occurredAt,
329
+ capturedBy,
330
+ };
331
+ }
332
+
333
+ function pickField(p, snapshotKey, ...sqlKeys) {
334
+ if (p[snapshotKey] != null) return p[snapshotKey];
335
+ const row = p.row || {};
336
+ for (const k of sqlKeys) {
337
+ if (row[k] != null) return row[k];
338
+ }
339
+ return null;
340
+ }
341
+
342
+ function normalizeWatch(p, raw, ingestedAt) {
343
+ if (!p.row && !p.photoId && !p.caption) {
344
+ if (!p.row) {
345
+ throw new Error("KuaishouAdapter.normalize: row missing");
346
+ }
221
347
  }
348
+ const isSnapshot = !p.row;
349
+ const row = p.row || {};
350
+ const caption =
351
+ pickField(p, "caption", "caption", "title") || "(no caption)";
352
+ const occurredAt =
353
+ parseTime(p.capturedAt) ||
354
+ parseTime(row.view_time || row.time || row.create_time) ||
355
+ raw.capturedAt ||
356
+ ingestedAt;
357
+ const source = buildSource(
358
+ raw,
359
+ occurredAt,
360
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
361
+ );
362
+ return {
363
+ events: [
364
+ {
365
+ id: newId(),
366
+ type: ENTITY_TYPES.EVENT,
367
+ subtype: EVENT_SUBTYPES.BROWSE,
368
+ occurredAt,
369
+ actor: "person-self",
370
+ content: { title: caption },
371
+ ingestedAt,
372
+ source,
373
+ extra: {
374
+ platform: "kuaishou",
375
+ photoId: pickField(p, "photoId", "photo_id"),
376
+ duration: pickField(p, "duration", "duration", "play_duration"),
377
+ authorId: pickField(p, "authorId", "author_id"),
378
+ authorName: pickField(p, "authorName", "author_name"),
379
+ },
380
+ },
381
+ ],
382
+ persons: [],
383
+ places: [],
384
+ items: [],
385
+ topics: [],
386
+ };
222
387
  }
223
388
 
224
- function parseTime(v) {
225
- if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
226
- if (typeof v === "string") {
227
- if (/^\d+$/.test(v)) {
228
- const n = parseInt(v, 10);
229
- return n > 1e12 ? n : n * 1000;
389
+ function normalizeCollect(p, raw, ingestedAt) {
390
+ if (!p.row && !p.photoId && !p.caption) {
391
+ if (!p.row) {
392
+ throw new Error("KuaishouAdapter.normalize: row missing");
230
393
  }
231
- const t = Date.parse(v);
232
- return Number.isFinite(t) ? t : null;
233
394
  }
234
- return null;
395
+ const isSnapshot = !p.row;
396
+ const row = p.row || {};
397
+ const caption =
398
+ pickField(p, "caption", "caption", "title") || "(no caption)";
399
+ const occurredAt =
400
+ parseTime(p.capturedAt) ||
401
+ parseTime(row.collect_time || row.time) ||
402
+ raw.capturedAt ||
403
+ ingestedAt;
404
+ const source = buildSource(
405
+ raw,
406
+ occurredAt,
407
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
408
+ );
409
+ return {
410
+ events: [
411
+ {
412
+ id: newId(),
413
+ type: ENTITY_TYPES.EVENT,
414
+ subtype: EVENT_SUBTYPES.LIKE,
415
+ occurredAt,
416
+ actor: "person-self",
417
+ content: { title: caption },
418
+ ingestedAt,
419
+ source,
420
+ extra: {
421
+ platform: "kuaishou",
422
+ photoId: pickField(p, "photoId", "photo_id"),
423
+ authorId: pickField(p, "authorId", "author_id"),
424
+ authorName: pickField(p, "authorName", "author_name"),
425
+ },
426
+ },
427
+ ],
428
+ persons: [],
429
+ places: [],
430
+ items: [],
431
+ topics: [],
432
+ };
433
+ }
434
+
435
+ function normalizeSearch(p, raw, ingestedAt) {
436
+ if (!p.row && !p.keyword && !p.query) {
437
+ if (!p.row) {
438
+ throw new Error("KuaishouAdapter.normalize: row missing");
439
+ }
440
+ }
441
+ const isSnapshot = !p.row;
442
+ const row = p.row || {};
443
+ const keyword =
444
+ pickField(p, "keyword", "keyword", "query") ||
445
+ pickField(p, "query") ||
446
+ "(empty query)";
447
+ const occurredAt =
448
+ parseTime(p.capturedAt) ||
449
+ parseTime(p.searchAt) ||
450
+ parseTime(row.search_time || row.time) ||
451
+ raw.capturedAt ||
452
+ ingestedAt;
453
+ const source = buildSource(
454
+ raw,
455
+ occurredAt,
456
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
457
+ );
458
+ return {
459
+ events: [
460
+ {
461
+ id: newId(),
462
+ type: ENTITY_TYPES.EVENT,
463
+ subtype: EVENT_SUBTYPES.POST,
464
+ occurredAt,
465
+ actor: "person-self",
466
+ content: { title: keyword },
467
+ ingestedAt,
468
+ source,
469
+ extra: {
470
+ platform: "kuaishou",
471
+ kind: "search",
472
+ keyword,
473
+ },
474
+ },
475
+ ],
476
+ persons: [],
477
+ places: [],
478
+ items: [],
479
+ topics: [],
480
+ };
235
481
  }
236
482
 
237
- module.exports = { KuaishouAdapter, NAME, VERSION };
483
+ module.exports = {
484
+ KuaishouAdapter,
485
+ NAME,
486
+ VERSION,
487
+ SNAPSHOT_SCHEMA_VERSION,
488
+ VALID_SNAPSHOT_KINDS,
489
+ };