@chainlesschain/personal-data-hub 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,48 +1,161 @@
1
1
  /**
2
- * Phase 13.2 — Weibo (微博) adapter.
2
+ * §A8 v0.2 — Weibo (微博) adapter, dual-mode (snapshot + sqlite).
3
3
  *
4
- * Source: Weibo Android app SQLite DBs (per sjqz/parsers/social.py
5
- * WeiboParser). Three tables of v0 interest:
6
- * - post / status posts the user published
7
- * - search_history queries
8
- * - message / direct private messages
4
+ * Mirror of social-bilibili/adapter.js two-mode pattern:
5
+ *
6
+ * 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
7
+ * JSON produced by the phone's WeiboLocalCollector (WebView + OkHttp).
8
+ * Desktop-independent path. Adapter is stateless when in snapshot mode
9
+ * — account.uid is OPTIONAL at construction (the snapshot file carries
10
+ * account in payload).
11
+ *
12
+ * 2. sqlite mode (opts.dbPath, legacy): Phase 13.2 device-pull path —
13
+ * reads Weibo Android app's SQLite (history / post / status / search_
14
+ * history). Preserved for backward compat with desktop sqlite-mode
15
+ * users; account.uid REQUIRED in this mode.
16
+ *
17
+ * Snapshot schema (mirrors WeiboLocalCollector.SNAPSHOT_SCHEMA_VERSION):
18
+ *
19
+ * {
20
+ * "schemaVersion": 1,
21
+ * "snapshottedAt": <epoch-ms>,
22
+ * "account": { "uid": "12345", "displayName": "alice" },
23
+ * "events": [
24
+ * { "kind": "post", "id": "post-<mid>", "capturedAt": <ms>,
25
+ * "text": "...", "mid": "...", "source": "...",
26
+ * "repostsCount": N, "commentsCount": N, "likesCount": N, "picCount": N },
27
+ * { "kind": "favourite", "id": "fav-<mid>", "capturedAt": <ms>,
28
+ * "text": "...", "mid": "...", "authorScreenName": "..." },
29
+ * { "kind": "follow", "id": "follow-<uid>", "capturedAt": <ms>,
30
+ * "uid": <num>, "screenName": "...", "description": "...", "avatarUrl": "..." }
31
+ * ]
32
+ * }
9
33
  */
10
34
 
11
35
  "use strict";
12
36
 
13
37
  const fs = require("node:fs");
14
38
  const { newId } = require("../../ids");
39
+ const {
40
+ ENTITY_TYPES,
41
+ PERSON_SUBTYPES,
42
+ EVENT_SUBTYPES,
43
+ CAPTURED_BY,
44
+ } = require("../../constants");
15
45
 
16
46
  const NAME = "social-weibo";
17
- const VERSION = "0.5.0";
47
+ const VERSION = "0.6.0";
48
+ const SNAPSHOT_SCHEMA_VERSION = 1;
49
+
50
+ const KIND_POST = "post";
51
+ const KIND_FAVOURITE = "favourite";
52
+ const KIND_FOLLOW = "follow";
53
+ const KIND_SEARCH = "search"; // legacy sqlite-mode only
54
+ const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_POST, KIND_FAVOURITE, KIND_FOLLOW]);
55
+
56
+ function stableOriginalId(kind, id) {
57
+ const stringified =
58
+ (typeof id === "string" && id.length > 0 && id) ||
59
+ (typeof id === "number" && Number.isFinite(id) && String(id)) ||
60
+ null;
61
+ const safe =
62
+ stringified ||
63
+ `unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
64
+ return `weibo:${kind}:${safe}`;
65
+ }
66
+
67
+ function parseTime(v) {
68
+ if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
69
+ if (typeof v === "string") {
70
+ if (/^\d+$/.test(v)) {
71
+ const n = parseInt(v, 10);
72
+ return n > 1e12 ? n : n * 1000;
73
+ }
74
+ const t = Date.parse(v);
75
+ return Number.isFinite(t) ? t : null;
76
+ }
77
+ return null;
78
+ }
79
+
80
+ function trySelect(db, sql) {
81
+ try { return db.prepare(sql).all(); } catch (_e) { return null; }
82
+ }
18
83
 
19
84
  class WeiboAdapter {
20
85
  constructor(opts = {}) {
21
- if (!opts.account || !opts.account.uid) {
22
- throw new Error("WeiboAdapter: opts.account.uid required");
23
- }
24
- this.account = opts.account;
86
+ // §A8 v0.2: account.uid now OPTIONAL at construction — snapshot mode is
87
+ // stateless and pulls account from the snapshot file. Sqlite mode (legacy
88
+ // device-pull) still requires it; checked at sync time, not construction.
89
+ this.account = opts.account || null;
25
90
  this._dbPath = opts.dbPath || null;
26
- this._dbDriverFactory = opts.dbDriverFactory || null;
27
91
 
28
92
  this.name = NAME;
29
93
  this.version = VERSION;
30
- this.capabilities = ["sync:sqlite", "parse:weibo-posts", "parse:weibo-search"];
94
+ this.capabilities = [
95
+ "sync:snapshot",
96
+ "sync:sqlite",
97
+ "parse:weibo-posts",
98
+ "parse:weibo-favourite",
99
+ "parse:weibo-follow",
100
+ "parse:weibo-search",
101
+ ];
102
+ // Existing desktop wiring may key off this — kept as device-pull (the
103
+ // sqlite mode is the desktop-side; snapshot mode is in-APK Android).
31
104
  this.extractMode = "device-pull";
32
105
  this.rateLimits = {};
33
106
  this.dataDisclosure = {
34
107
  fields: [
35
- "weibo:posts (text / created_at / reposts_count / comments_count)",
36
- "weibo:search_history",
37
- "weibo:messages",
108
+ "weibo:posts (text / created_at / reposts_count / comments_count / likes)",
109
+ "weibo:favourite (mid / text / author)",
110
+ "weibo:follow (uid / screen_name)",
111
+ "weibo:search_history (legacy sqlite mode)",
38
112
  ],
39
113
  sensitivity: "medium",
40
114
  legalGate: false,
115
+ defaultInclude: {
116
+ post: true,
117
+ favourite: true,
118
+ follow: true,
119
+ },
120
+ };
121
+
122
+ // _deps injection seam for tests (vi.mock fs/ doesn't intercept require in
123
+ // inlined CJS — see .claude/rules/testing.md).
124
+ this._deps = {
125
+ fs,
126
+ dbDriverFactory: opts.dbDriverFactory || null,
41
127
  };
42
128
  }
43
129
 
44
- async authenticate() {
45
- return { ok: true, account: this.account.uid };
130
+ async authenticate(ctx = {}) {
131
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
132
+ try {
133
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
134
+ } catch (err) {
135
+ return {
136
+ ok: false,
137
+ reason: "INPUT_PATH_UNREADABLE",
138
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
139
+ };
140
+ }
141
+ return { ok: true, mode: "snapshot-file" };
142
+ }
143
+ if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
144
+ if (!this.account || !this.account.uid) {
145
+ return {
146
+ ok: false,
147
+ reason: "NO_ACCOUNT_UID",
148
+ message: "social-weibo.authenticate: sqlite mode requires account.uid",
149
+ };
150
+ }
151
+ return { ok: true, account: this.account.uid, mode: "sqlite" };
152
+ }
153
+ return {
154
+ ok: false,
155
+ reason: "NO_INPUT",
156
+ message:
157
+ "social-weibo.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
158
+ };
46
159
  }
47
160
 
48
161
  async healthCheck() {
@@ -50,115 +163,300 @@ class WeiboAdapter {
50
163
  }
51
164
 
52
165
  async *sync(opts = {}) {
166
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
167
+ yield* this._syncViaSnapshot(opts);
168
+ return;
169
+ }
53
170
  const dbPath = opts.dbPath || this._dbPath;
54
- if (!dbPath || !fs.existsSync(dbPath)) return;
55
- const Driver = this._dbDriverFactory
56
- ? this._dbDriverFactory()
171
+ if (dbPath) {
172
+ yield* this._syncViaSqlite({ ...opts, dbPath });
173
+ return;
174
+ }
175
+ throw new Error(
176
+ "social-weibo.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
177
+ );
178
+ }
179
+
180
+ async *_syncViaSnapshot(opts) {
181
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
182
+ const snapshot = JSON.parse(raw);
183
+ if (
184
+ !snapshot ||
185
+ typeof snapshot !== "object" ||
186
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
187
+ ) {
188
+ throw new Error(
189
+ `social-weibo.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
190
+ );
191
+ }
192
+ const fallbackCapturedAt =
193
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
194
+ ? Math.floor(snapshot.snapshottedAt)
195
+ : Date.now();
196
+
197
+ const account =
198
+ snapshot.account && typeof snapshot.account === "object"
199
+ ? snapshot.account
200
+ : null;
201
+ const include = opts.include || {};
202
+ const limit =
203
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
204
+
205
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
206
+ let emitted = 0;
207
+ for (const ev of events) {
208
+ if (emitted >= limit) return;
209
+ if (!ev || typeof ev !== "object") continue;
210
+ const kind = ev.kind;
211
+ if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
212
+ if (include[kind] === false) continue;
213
+
214
+ const capturedAt =
215
+ parseTime(ev.capturedAt) ||
216
+ parseTime(ev.time) ||
217
+ fallbackCapturedAt;
218
+ const id =
219
+ (typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
220
+ ev.mid ||
221
+ ev.uid ||
222
+ null;
223
+
224
+ yield {
225
+ adapter: NAME,
226
+ kind,
227
+ originalId: stableOriginalId(kind, id),
228
+ capturedAt,
229
+ payload: { ...ev, account },
230
+ };
231
+ emitted += 1;
232
+ }
233
+ }
234
+
235
+ async *_syncViaSqlite(opts) {
236
+ // Legacy Phase 13.2 path — requires account.uid in constructor and a DB
237
+ // pulled via the desktop AndroidExtractor.
238
+ if (!this.account || !this.account.uid) {
239
+ throw new Error(
240
+ "social-weibo._syncViaSqlite: account.uid required (set via new WeiboAdapter({ account: { uid } }) in cli wiring)",
241
+ );
242
+ }
243
+ const dbPath = opts.dbPath;
244
+ if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
245
+ const Driver = this._deps.dbDriverFactory
246
+ ? this._deps.dbDriverFactory()
57
247
  : require("better-sqlite3-multiple-ciphers");
58
248
  const db = new Driver(dbPath, { readonly: true });
59
249
 
60
250
  try {
61
- const posts = trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000")
62
- || trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000") || [];
251
+ const posts =
252
+ trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000")
253
+ || trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000")
254
+ || [];
63
255
  for (const row of posts) {
64
256
  yield {
65
257
  adapter: NAME,
66
258
  originalId: `post-${row.id || row.mid || row.idstr}`,
67
259
  capturedAt: parseTime(row.created_at || row.time),
68
- payload: { row, kind: "post" },
260
+ payload: { row, kind: KIND_POST },
69
261
  };
70
262
  }
71
263
 
72
- const searches = trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000") || [];
264
+ const searches =
265
+ trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
266
+ || [];
73
267
  for (const row of searches) {
74
268
  yield {
75
269
  adapter: NAME,
76
270
  originalId: `search-${row.id || row._id}`,
77
271
  capturedAt: parseTime(row.time || row.create_at),
78
- payload: { row, kind: "search" },
272
+ payload: { row, kind: KIND_SEARCH },
79
273
  };
80
274
  }
81
275
  } finally {
82
- try { db.close(); } catch (_e) {}
276
+ try { db.close(); } catch (_e) { /* ignore */ }
83
277
  }
84
278
  }
85
279
 
86
280
  normalize(raw) {
87
- if (!raw || !raw.payload || !raw.payload.row) {
88
- throw new Error("WeiboAdapter.normalize: row missing");
281
+ if (!raw || !raw.payload) {
282
+ throw new Error("WeiboAdapter.normalize: payload missing");
89
283
  }
90
- const { kind, row } = raw.payload;
91
- const now = Date.now();
92
- const occurredAt = parseTime(row.created_at || row.time) || now;
93
- const source = {
94
- adapter: NAME, adapterVersion: VERSION,
95
- originalId: raw.originalId, capturedAt: occurredAt,
96
- capturedBy: "sqlite",
97
- };
284
+ const ingestedAt = Date.now();
285
+ const kind = raw.kind || raw.payload.kind;
286
+ const p = raw.payload;
98
287
 
99
- if (kind === "search") {
100
- return {
101
- events: [{
102
- id: newId(),
103
- type: "event",
104
- subtype: "interaction",
105
- occurredAt,
106
- actor: "person-self",
107
- content: {
108
- title: `搜索: ${row.keyword || row.query || ""}`,
109
- text: row.keyword || row.query || "",
110
- },
111
- ingestedAt: now,
112
- source,
113
- extra: { query: row.keyword || row.query, fromAdapter: NAME },
114
- }],
115
- persons: [], places: [], items: [], topics: [],
116
- };
288
+ // Sqlite-mode payload carries `row`; snapshot-mode payload carries fields
289
+ // directly. The normalizers below handle both shapes.
290
+ if (kind === KIND_SEARCH) {
291
+ return normalizeSearch(p, raw, ingestedAt);
117
292
  }
118
-
119
- // Post
120
- return {
121
- events: [{
122
- id: newId(),
123
- type: "event",
124
- subtype: "post",
125
- occurredAt,
126
- actor: "person-self",
127
- content: {
128
- title: (row.text || "").slice(0, 80) || "(空)",
129
- text: row.text || "",
130
- },
131
- ingestedAt: now,
132
- source,
133
- extra: {
134
- weiboMid: row.mid || row.id || row.idstr || null,
135
- repostsCount: row.reposts_count || row.repost || 0,
136
- commentsCount: row.comments_count || row.comments || 0,
137
- likesCount: row.attitudes_count || row.likes || 0,
138
- source: row.source || null, // 客户端
139
- location: row.location || row.geo || null,
140
- },
141
- }],
142
- persons: [], places: [], items: [], topics: [],
143
- };
293
+ if (kind === KIND_POST) {
294
+ return normalizePost(p, raw, ingestedAt);
295
+ }
296
+ if (kind === KIND_FAVOURITE) {
297
+ return normalizeFavourite(p, raw, ingestedAt);
298
+ }
299
+ if (kind === KIND_FOLLOW) {
300
+ return normalizeFollow(p, raw, ingestedAt);
301
+ }
302
+ throw new Error(`WeiboAdapter.normalize: unknown kind ${kind}`);
144
303
  }
145
304
  }
146
305
 
147
- function trySelect(db, sql) {
148
- try { return db.prepare(sql).all(); } catch (_e) { return null; }
306
+ function buildSource(raw, occurredAt, capturedBy) {
307
+ return {
308
+ adapter: NAME,
309
+ adapterVersion: VERSION,
310
+ originalId: raw.originalId,
311
+ capturedAt: raw.capturedAt || occurredAt,
312
+ capturedBy: capturedBy || CAPTURED_BY.SQLITE,
313
+ };
149
314
  }
150
315
 
151
- function parseTime(v) {
152
- if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
153
- if (typeof v === "string") {
154
- if (/^\d+$/.test(v)) {
155
- const n = parseInt(v, 10);
156
- return n > 1e12 ? n : n * 1000;
157
- }
158
- const t = Date.parse(v);
159
- return Number.isFinite(t) ? t : null;
160
- }
161
- return null;
316
+ function normalizeSearch(p, raw, ingestedAt) {
317
+ // Sqlite-mode only: payload.row.keyword / row.query
318
+ const row = p.row || {};
319
+ const occurredAt = parseTime(row.time || row.create_at) || ingestedAt;
320
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
321
+ return {
322
+ events: [{
323
+ id: newId(),
324
+ type: ENTITY_TYPES.EVENT,
325
+ subtype: EVENT_SUBTYPES.INTERACTION,
326
+ occurredAt,
327
+ actor: "person-self",
328
+ content: {
329
+ title: `搜索: ${row.keyword || row.query || ""}`,
330
+ text: row.keyword || row.query || "",
331
+ },
332
+ ingestedAt,
333
+ source,
334
+ extra: { query: row.keyword || row.query, fromAdapter: NAME },
335
+ }],
336
+ persons: [], places: [], items: [], topics: [],
337
+ };
338
+ }
339
+
340
+ function normalizePost(p, raw, ingestedAt) {
341
+ // Snapshot mode: { kind:"post", text, mid, source, repostsCount, … } direct
342
+ // Sqlite mode: { kind:"post", row: { text, mid, ... } }
343
+ const row = p.row || p;
344
+ const isSnapshot = !p.row;
345
+ const text = row.text || "";
346
+ const mid = row.mid || row.id || row.idstr || null;
347
+ const occurredAt =
348
+ parseTime(row.created_at || row.createdAt || row.time || raw.capturedAt) ||
349
+ ingestedAt;
350
+ const source = buildSource(
351
+ raw,
352
+ occurredAt,
353
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
354
+ );
355
+ return {
356
+ events: [{
357
+ id: newId(),
358
+ type: ENTITY_TYPES.EVENT,
359
+ subtype: EVENT_SUBTYPES.POST,
360
+ occurredAt,
361
+ actor: "person-self",
362
+ content: {
363
+ title: (text || "").slice(0, 80) || "(空)",
364
+ text,
365
+ },
366
+ ingestedAt,
367
+ source,
368
+ extra: {
369
+ weiboMid: mid,
370
+ repostsCount:
371
+ row.repostsCount != null ? row.repostsCount
372
+ : row.reposts_count || row.repost || 0,
373
+ commentsCount:
374
+ row.commentsCount != null ? row.commentsCount
375
+ : row.comments_count || row.comments || 0,
376
+ likesCount:
377
+ row.likesCount != null ? row.likesCount
378
+ : row.attitudes_count || row.likes || 0,
379
+ picCount: row.picCount || row.pic_num || 0,
380
+ source: row.source || null,
381
+ location: row.location || row.geo || null,
382
+ platform: "weibo",
383
+ },
384
+ }],
385
+ persons: [], places: [], items: [], topics: [],
386
+ };
387
+ }
388
+
389
+ function normalizeFavourite(p, raw, ingestedAt) {
390
+ // Snapshot only — sqlite mode has no favourite kind (legacy parser merged
391
+ // favourites into posts pre-A8). Payload: { kind:"favourite", mid, text,
392
+ // capturedAt, authorScreenName }
393
+ const text = p.text || "";
394
+ const mid = p.mid || null;
395
+ const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
396
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
397
+ return {
398
+ events: [{
399
+ id: newId(),
400
+ type: ENTITY_TYPES.EVENT,
401
+ subtype: EVENT_SUBTYPES.LIKE,
402
+ occurredAt,
403
+ actor: "person-self",
404
+ content: {
405
+ title: (text || "").slice(0, 80) || "(空)",
406
+ text,
407
+ },
408
+ ingestedAt,
409
+ source,
410
+ extra: {
411
+ platform: "weibo",
412
+ weiboMid: mid,
413
+ authorScreenName: p.authorScreenName || null,
414
+ },
415
+ }],
416
+ persons: [], places: [], items: [], topics: [],
417
+ };
418
+ }
419
+
420
+ function normalizeFollow(p, raw, ingestedAt) {
421
+ // Snapshot only — payload: { kind:"follow", uid, screenName, description,
422
+ // avatarUrl, capturedAt }
423
+ const followUid =
424
+ (typeof p.uid === "number" && p.uid) ||
425
+ (typeof p.uid === "string" && p.uid.length > 0 && p.uid) ||
426
+ `unknown-${newId()}`;
427
+ const screenName = p.screenName || "(unnamed)";
428
+ const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
429
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
430
+ const person = {
431
+ id: `person-weibo-${followUid}`,
432
+ type: ENTITY_TYPES.PERSON,
433
+ subtype: PERSON_SUBTYPES.CONTACT,
434
+ names: [screenName],
435
+ ingestedAt,
436
+ source,
437
+ identifiers: {
438
+ "weibo-uid": [String(followUid)],
439
+ },
440
+ extra: {
441
+ platform: "weibo",
442
+ description: p.description || null,
443
+ avatarUrl: p.avatarUrl || null,
444
+ followedAt: occurredAt,
445
+ },
446
+ };
447
+ return {
448
+ events: [],
449
+ persons: [person],
450
+ places: [],
451
+ items: [],
452
+ topics: [],
453
+ };
162
454
  }
163
455
 
164
- module.exports = { WeiboAdapter, NAME, VERSION };
456
+ module.exports = {
457
+ WeiboAdapter,
458
+ NAME,
459
+ VERSION,
460
+ SNAPSHOT_SCHEMA_VERSION,
461
+ VALID_SNAPSHOT_KINDS,
462
+ };