@chainlesschain/personal-data-hub 0.4.38 → 0.4.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,13 +57,20 @@ const {
57
57
  } = require("../../constants");
58
58
 
59
59
  const NAME = "social-weibo";
60
- const VERSION = "0.7.0";
60
+ const VERSION = "0.8.0";
61
61
  const SNAPSHOT_SCHEMA_VERSION = 1;
62
62
 
63
63
  const KIND_POST = "post";
64
64
  const KIND_FAVOURITE = "favourite";
65
65
  const KIND_FOLLOW = "follow";
66
66
  const KIND_SEARCH = "search"; // legacy sqlite-mode only
67
+ // Private-message (私信) kinds — read from the sibling `message_<uid>.db`
68
+ // (device-verified schema 2026-06-28: t_buddy/t_session/t_message). Opt-in
69
+ // (opts.includeDm) because DMs are high-sensitivity. See
70
+ // docs/internal/pdh-app-db-schemas.md → 微博 message_<uid>.db.
71
+ const KIND_DM_BUDDY = "dm-buddy"; // t_buddy → PERSON(CONTACT)
72
+ const KIND_DM_SESSION = "dm-session"; // t_session → TOPIC
73
+ const KIND_DM_MESSAGE = "dm-message"; // t_message → EVENT(MESSAGE)
67
74
  const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_POST, KIND_FAVOURITE, KIND_FOLLOW]);
68
75
 
69
76
  function stableOriginalId(kind, id) {
@@ -122,6 +129,9 @@ class WeiboAdapter {
122
129
  "weibo:favourite (mid / text / author)",
123
130
  "weibo:follow (uid / screen_name)",
124
131
  "weibo:search_history (legacy sqlite mode)",
132
+ "weibo:dm-buddy (uid / nick / remark) — HIGH sensitivity, opt-in (includeDm)",
133
+ "weibo:dm-session (session_id / unread) — HIGH sensitivity, opt-in",
134
+ "weibo:dm-message (time / outgoing / content) — HIGH sensitivity, opt-in",
125
135
  ],
126
136
  sensitivity: "medium",
127
137
  legalGate: false,
@@ -129,6 +139,8 @@ class WeiboAdapter {
129
139
  post: true,
130
140
  favourite: true,
131
141
  follow: true,
142
+ // Private messages are off by default — require opts.includeDm:true.
143
+ dm: false,
132
144
  },
133
145
  };
134
146
 
@@ -331,6 +343,78 @@ class WeiboAdapter {
331
343
  } finally {
332
344
  try { db.close(); } catch (_e) { /* ignore */ }
333
345
  }
346
+
347
+ // Private messages live in a SEPARATE sibling DB `message_<uid>.db`.
348
+ // High-sensitivity → opt-in only (opts.includeDm === true).
349
+ if (opts.includeDm === true) {
350
+ yield* this._syncDmMessages(opts, selfUid);
351
+ }
352
+ }
353
+
354
+ // Reads the Weibo private-message DB `message_<uid>.db` (a sibling of the
355
+ // `sina_weibo` file, or opts.messageDbPath). device-verified schema:
356
+ // t_buddy → PERSON (DM contacts: uid/nick/remark/screen_name)
357
+ // t_session → TOPIC (conversation threads: session_id/type/update_time)
358
+ // t_message → EVENT (messages: time/outgoing/content_type/content/sender_id)
359
+ // Columns confirmed against a real populated device (2026-06-28); t_message
360
+ // content encoding is best-effort (no rows on the reference account).
361
+ async *_syncDmMessages(opts, selfUid) {
362
+ const path = require("node:path");
363
+ const baseDbPath = opts.dbPath || this._dbPath;
364
+ const msgDbPath =
365
+ opts.messageDbPath ||
366
+ (baseDbPath
367
+ ? path.join(path.dirname(baseDbPath), `message_${selfUid}.db`)
368
+ : null);
369
+ if (!msgDbPath || !this._deps.fs.existsSync(msgDbPath)) return;
370
+ const Driver = this._deps.dbDriverFactory
371
+ ? this._deps.dbDriverFactory()
372
+ : require("better-sqlite3-multiple-ciphers");
373
+ const db = new Driver(msgDbPath, { readonly: true });
374
+ try {
375
+ // BUDDIES → PERSON
376
+ const buddies = trySelect(db, "SELECT * FROM t_buddy LIMIT 5000") || [];
377
+ for (const row of buddies) {
378
+ if (row.uid == null) continue;
379
+ yield {
380
+ adapter: NAME,
381
+ originalId: `dm-buddy-${row.uid}`,
382
+ capturedAt: Date.now(),
383
+ payload: { row, kind: KIND_DM_BUDDY },
384
+ };
385
+ }
386
+ // SESSIONS → TOPIC
387
+ const sessions =
388
+ trySelect(
389
+ db,
390
+ "SELECT * FROM t_session ORDER BY update_time DESC LIMIT 5000",
391
+ ) || [];
392
+ for (const row of sessions) {
393
+ if (row.session_id == null) continue;
394
+ yield {
395
+ adapter: NAME,
396
+ originalId: `dm-session-${row.session_id}`,
397
+ capturedAt: parseTime(row.update_time) || Date.now(),
398
+ payload: { row, kind: KIND_DM_SESSION },
399
+ };
400
+ }
401
+ // MESSAGES → EVENT (content best-effort; schema device-verified)
402
+ const messages =
403
+ trySelect(
404
+ db,
405
+ "SELECT * FROM t_message ORDER BY time DESC LIMIT 10000",
406
+ ) || [];
407
+ for (const row of messages) {
408
+ yield {
409
+ adapter: NAME,
410
+ originalId: `dm-msg-${row.global_id || row.id}`,
411
+ capturedAt: parseTime(row.time) || Date.now(),
412
+ payload: { row, kind: KIND_DM_MESSAGE },
413
+ };
414
+ }
415
+ } finally {
416
+ try { db.close(); } catch (_e) { /* ignore */ }
417
+ }
334
418
  }
335
419
 
336
420
  normalize(raw) {
@@ -355,6 +439,15 @@ class WeiboAdapter {
355
439
  if (kind === KIND_FOLLOW) {
356
440
  return normalizeFollow(p, raw, ingestedAt);
357
441
  }
442
+ if (kind === KIND_DM_BUDDY) {
443
+ return normalizeDmBuddy(p, raw, ingestedAt);
444
+ }
445
+ if (kind === KIND_DM_SESSION) {
446
+ return normalizeDmSession(p, raw, ingestedAt);
447
+ }
448
+ if (kind === KIND_DM_MESSAGE) {
449
+ return normalizeDmMessage(p, raw, ingestedAt);
450
+ }
358
451
  throw new Error(`WeiboAdapter.normalize: unknown kind ${kind}`);
359
452
  }
360
453
  }
@@ -533,6 +626,93 @@ function normalizeFollow(p, raw, ingestedAt) {
533
626
  };
534
627
  }
535
628
 
629
+ // ─── Private-message (私信) normalizers — device-verified message_<uid>.db ──
630
+
631
+ function normalizeDmBuddy(p, raw, ingestedAt) {
632
+ const row = p.row || {};
633
+ const uid = row.uid != null ? String(row.uid) : `unknown-${newId()}`;
634
+ const name = row.remark || row.screen_name || row.nick || "(unnamed)";
635
+ const occurredAt = raw.capturedAt || ingestedAt;
636
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
637
+ const person = {
638
+ id: `person-weibo-${uid}`,
639
+ type: ENTITY_TYPES.PERSON,
640
+ subtype: PERSON_SUBTYPES.CONTACT,
641
+ names: [String(name)],
642
+ ingestedAt,
643
+ source,
644
+ identifiers: { "weibo-uid": [uid] },
645
+ extra: {
646
+ platform: "weibo",
647
+ via: "dm",
648
+ gender: row.gender != null ? row.gender : null,
649
+ verified: row.verified === 1 || row.verified === true || null,
650
+ follower: typeof row.follower === "number" ? row.follower : null,
651
+ following: typeof row.following === "number" ? row.following : null,
652
+ },
653
+ };
654
+ return { events: [], persons: [person], places: [], items: [], topics: [] };
655
+ }
656
+
657
+ function normalizeDmSession(p, raw, ingestedAt) {
658
+ const row = p.row || {};
659
+ const sid = row.session_id != null ? String(row.session_id) : `unknown-${newId()}`;
660
+ const occurredAt = parseTime(row.update_time) || raw.capturedAt || ingestedAt;
661
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
662
+ const topic = {
663
+ id: `topic-weibo-dm-${sid}`,
664
+ type: ENTITY_TYPES.TOPIC,
665
+ name: `微博私信会话 ${sid}`,
666
+ ingestedAt,
667
+ source,
668
+ extra: {
669
+ platform: "weibo",
670
+ via: "dm",
671
+ sessionId: sid,
672
+ sessionType: row.type != null ? row.type : null,
673
+ unread: typeof row.im_unread_count === "number" ? row.im_unread_count : null,
674
+ lastUpdate: occurredAt,
675
+ },
676
+ };
677
+ return { events: [], persons: [], places: [], items: [], topics: [topic] };
678
+ }
679
+
680
+ function normalizeDmMessage(p, raw, ingestedAt) {
681
+ const row = p.row || {};
682
+ const occurredAt = parseTime(row.time) || raw.capturedAt || ingestedAt;
683
+ const outgoing = row.outgoing === 1 || row.outgoing === true;
684
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
685
+ // content is plain text for text messages (content_type 0/1/null); other
686
+ // types carry structured/empty content → emit a typed placeholder. Encoding
687
+ // is device-verified-schema but best-effort (no rows on reference account).
688
+ const isText =
689
+ row.content_type == null || row.content_type === 0 || row.content_type === 1;
690
+ const rawText =
691
+ isText && typeof row.content === "string" && row.content.length > 0
692
+ ? row.content
693
+ : `[${row.content_type != null ? `type:${row.content_type}` : "non-text"}]`;
694
+ const text = rawText.length > 2000 ? rawText.slice(0, 2000) + "…" : rawText;
695
+ const event = {
696
+ id: `event-weibo-dm-${row.global_id || row.id || newId()}`,
697
+ type: ENTITY_TYPES.EVENT,
698
+ subtype: EVENT_SUBTYPES.MESSAGE,
699
+ occurredAt,
700
+ ingestedAt,
701
+ source,
702
+ actor: outgoing ? "self" : "contact",
703
+ content: { text },
704
+ extra: {
705
+ platform: "weibo",
706
+ via: "dm",
707
+ sessionId: row.session_id != null ? String(row.session_id) : null,
708
+ senderId: row.sender_id != null ? String(row.sender_id) : null,
709
+ contentType: row.content_type != null ? row.content_type : null,
710
+ outgoing,
711
+ },
712
+ };
713
+ return { events: [event], persons: [], places: [], items: [], topics: [] };
714
+ }
715
+
536
716
  module.exports = {
537
717
  WeiboAdapter,
538
718
  NAME,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.38",
3
+ "version": "0.4.39",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",