@chainlesschain/personal-data-hub 0.4.38 → 0.4.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -57,13 +57,20 @@ const {
|
|
|
57
57
|
} = require("../../constants");
|
|
58
58
|
|
|
59
59
|
const NAME = "social-weibo";
|
|
60
|
-
const VERSION = "0.
|
|
60
|
+
const VERSION = "0.8.0";
|
|
61
61
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
62
62
|
|
|
63
63
|
const KIND_POST = "post";
|
|
64
64
|
const KIND_FAVOURITE = "favourite";
|
|
65
65
|
const KIND_FOLLOW = "follow";
|
|
66
66
|
const KIND_SEARCH = "search"; // legacy sqlite-mode only
|
|
67
|
+
// Private-message (私信) kinds — read from the sibling `message_<uid>.db`
|
|
68
|
+
// (device-verified schema 2026-06-28: t_buddy/t_session/t_message). Opt-in
|
|
69
|
+
// (opts.includeDm) because DMs are high-sensitivity. See
|
|
70
|
+
// docs/internal/pdh-app-db-schemas.md → 微博 message_<uid>.db.
|
|
71
|
+
const KIND_DM_BUDDY = "dm-buddy"; // t_buddy → PERSON(CONTACT)
|
|
72
|
+
const KIND_DM_SESSION = "dm-session"; // t_session → TOPIC
|
|
73
|
+
const KIND_DM_MESSAGE = "dm-message"; // t_message → EVENT(MESSAGE)
|
|
67
74
|
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_POST, KIND_FAVOURITE, KIND_FOLLOW]);
|
|
68
75
|
|
|
69
76
|
function stableOriginalId(kind, id) {
|
|
@@ -122,6 +129,9 @@ class WeiboAdapter {
|
|
|
122
129
|
"weibo:favourite (mid / text / author)",
|
|
123
130
|
"weibo:follow (uid / screen_name)",
|
|
124
131
|
"weibo:search_history (legacy sqlite mode)",
|
|
132
|
+
"weibo:dm-buddy (uid / nick / remark) — HIGH sensitivity, opt-in (includeDm)",
|
|
133
|
+
"weibo:dm-session (session_id / unread) — HIGH sensitivity, opt-in",
|
|
134
|
+
"weibo:dm-message (time / outgoing / content) — HIGH sensitivity, opt-in",
|
|
125
135
|
],
|
|
126
136
|
sensitivity: "medium",
|
|
127
137
|
legalGate: false,
|
|
@@ -129,6 +139,8 @@ class WeiboAdapter {
|
|
|
129
139
|
post: true,
|
|
130
140
|
favourite: true,
|
|
131
141
|
follow: true,
|
|
142
|
+
// Private messages are off by default — require opts.includeDm:true.
|
|
143
|
+
dm: false,
|
|
132
144
|
},
|
|
133
145
|
};
|
|
134
146
|
|
|
@@ -331,6 +343,78 @@ class WeiboAdapter {
|
|
|
331
343
|
} finally {
|
|
332
344
|
try { db.close(); } catch (_e) { /* ignore */ }
|
|
333
345
|
}
|
|
346
|
+
|
|
347
|
+
// Private messages live in a SEPARATE sibling DB `message_<uid>.db`.
|
|
348
|
+
// High-sensitivity → opt-in only (opts.includeDm === true).
|
|
349
|
+
if (opts.includeDm === true) {
|
|
350
|
+
yield* this._syncDmMessages(opts, selfUid);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Reads the Weibo private-message DB `message_<uid>.db` (a sibling of the
|
|
355
|
+
// `sina_weibo` file, or opts.messageDbPath). device-verified schema:
|
|
356
|
+
// t_buddy → PERSON (DM contacts: uid/nick/remark/screen_name)
|
|
357
|
+
// t_session → TOPIC (conversation threads: session_id/type/update_time)
|
|
358
|
+
// t_message → EVENT (messages: time/outgoing/content_type/content/sender_id)
|
|
359
|
+
// Columns confirmed against a real populated device (2026-06-28); t_message
|
|
360
|
+
// content encoding is best-effort (no rows on the reference account).
|
|
361
|
+
async *_syncDmMessages(opts, selfUid) {
|
|
362
|
+
const path = require("node:path");
|
|
363
|
+
const baseDbPath = opts.dbPath || this._dbPath;
|
|
364
|
+
const msgDbPath =
|
|
365
|
+
opts.messageDbPath ||
|
|
366
|
+
(baseDbPath
|
|
367
|
+
? path.join(path.dirname(baseDbPath), `message_${selfUid}.db`)
|
|
368
|
+
: null);
|
|
369
|
+
if (!msgDbPath || !this._deps.fs.existsSync(msgDbPath)) return;
|
|
370
|
+
const Driver = this._deps.dbDriverFactory
|
|
371
|
+
? this._deps.dbDriverFactory()
|
|
372
|
+
: require("better-sqlite3-multiple-ciphers");
|
|
373
|
+
const db = new Driver(msgDbPath, { readonly: true });
|
|
374
|
+
try {
|
|
375
|
+
// BUDDIES → PERSON
|
|
376
|
+
const buddies = trySelect(db, "SELECT * FROM t_buddy LIMIT 5000") || [];
|
|
377
|
+
for (const row of buddies) {
|
|
378
|
+
if (row.uid == null) continue;
|
|
379
|
+
yield {
|
|
380
|
+
adapter: NAME,
|
|
381
|
+
originalId: `dm-buddy-${row.uid}`,
|
|
382
|
+
capturedAt: Date.now(),
|
|
383
|
+
payload: { row, kind: KIND_DM_BUDDY },
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
// SESSIONS → TOPIC
|
|
387
|
+
const sessions =
|
|
388
|
+
trySelect(
|
|
389
|
+
db,
|
|
390
|
+
"SELECT * FROM t_session ORDER BY update_time DESC LIMIT 5000",
|
|
391
|
+
) || [];
|
|
392
|
+
for (const row of sessions) {
|
|
393
|
+
if (row.session_id == null) continue;
|
|
394
|
+
yield {
|
|
395
|
+
adapter: NAME,
|
|
396
|
+
originalId: `dm-session-${row.session_id}`,
|
|
397
|
+
capturedAt: parseTime(row.update_time) || Date.now(),
|
|
398
|
+
payload: { row, kind: KIND_DM_SESSION },
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
// MESSAGES → EVENT (content best-effort; schema device-verified)
|
|
402
|
+
const messages =
|
|
403
|
+
trySelect(
|
|
404
|
+
db,
|
|
405
|
+
"SELECT * FROM t_message ORDER BY time DESC LIMIT 10000",
|
|
406
|
+
) || [];
|
|
407
|
+
for (const row of messages) {
|
|
408
|
+
yield {
|
|
409
|
+
adapter: NAME,
|
|
410
|
+
originalId: `dm-msg-${row.global_id || row.id}`,
|
|
411
|
+
capturedAt: parseTime(row.time) || Date.now(),
|
|
412
|
+
payload: { row, kind: KIND_DM_MESSAGE },
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
} finally {
|
|
416
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
417
|
+
}
|
|
334
418
|
}
|
|
335
419
|
|
|
336
420
|
normalize(raw) {
|
|
@@ -355,6 +439,15 @@ class WeiboAdapter {
|
|
|
355
439
|
if (kind === KIND_FOLLOW) {
|
|
356
440
|
return normalizeFollow(p, raw, ingestedAt);
|
|
357
441
|
}
|
|
442
|
+
if (kind === KIND_DM_BUDDY) {
|
|
443
|
+
return normalizeDmBuddy(p, raw, ingestedAt);
|
|
444
|
+
}
|
|
445
|
+
if (kind === KIND_DM_SESSION) {
|
|
446
|
+
return normalizeDmSession(p, raw, ingestedAt);
|
|
447
|
+
}
|
|
448
|
+
if (kind === KIND_DM_MESSAGE) {
|
|
449
|
+
return normalizeDmMessage(p, raw, ingestedAt);
|
|
450
|
+
}
|
|
358
451
|
throw new Error(`WeiboAdapter.normalize: unknown kind ${kind}`);
|
|
359
452
|
}
|
|
360
453
|
}
|
|
@@ -533,6 +626,93 @@ function normalizeFollow(p, raw, ingestedAt) {
|
|
|
533
626
|
};
|
|
534
627
|
}
|
|
535
628
|
|
|
629
|
+
// ─── Private-message (私信) normalizers — device-verified message_<uid>.db ──
|
|
630
|
+
|
|
631
|
+
function normalizeDmBuddy(p, raw, ingestedAt) {
|
|
632
|
+
const row = p.row || {};
|
|
633
|
+
const uid = row.uid != null ? String(row.uid) : `unknown-${newId()}`;
|
|
634
|
+
const name = row.remark || row.screen_name || row.nick || "(unnamed)";
|
|
635
|
+
const occurredAt = raw.capturedAt || ingestedAt;
|
|
636
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
637
|
+
const person = {
|
|
638
|
+
id: `person-weibo-${uid}`,
|
|
639
|
+
type: ENTITY_TYPES.PERSON,
|
|
640
|
+
subtype: PERSON_SUBTYPES.CONTACT,
|
|
641
|
+
names: [String(name)],
|
|
642
|
+
ingestedAt,
|
|
643
|
+
source,
|
|
644
|
+
identifiers: { "weibo-uid": [uid] },
|
|
645
|
+
extra: {
|
|
646
|
+
platform: "weibo",
|
|
647
|
+
via: "dm",
|
|
648
|
+
gender: row.gender != null ? row.gender : null,
|
|
649
|
+
verified: row.verified === 1 || row.verified === true || null,
|
|
650
|
+
follower: typeof row.follower === "number" ? row.follower : null,
|
|
651
|
+
following: typeof row.following === "number" ? row.following : null,
|
|
652
|
+
},
|
|
653
|
+
};
|
|
654
|
+
return { events: [], persons: [person], places: [], items: [], topics: [] };
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
function normalizeDmSession(p, raw, ingestedAt) {
|
|
658
|
+
const row = p.row || {};
|
|
659
|
+
const sid = row.session_id != null ? String(row.session_id) : `unknown-${newId()}`;
|
|
660
|
+
const occurredAt = parseTime(row.update_time) || raw.capturedAt || ingestedAt;
|
|
661
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
662
|
+
const topic = {
|
|
663
|
+
id: `topic-weibo-dm-${sid}`,
|
|
664
|
+
type: ENTITY_TYPES.TOPIC,
|
|
665
|
+
name: `微博私信会话 ${sid}`,
|
|
666
|
+
ingestedAt,
|
|
667
|
+
source,
|
|
668
|
+
extra: {
|
|
669
|
+
platform: "weibo",
|
|
670
|
+
via: "dm",
|
|
671
|
+
sessionId: sid,
|
|
672
|
+
sessionType: row.type != null ? row.type : null,
|
|
673
|
+
unread: typeof row.im_unread_count === "number" ? row.im_unread_count : null,
|
|
674
|
+
lastUpdate: occurredAt,
|
|
675
|
+
},
|
|
676
|
+
};
|
|
677
|
+
return { events: [], persons: [], places: [], items: [], topics: [topic] };
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
function normalizeDmMessage(p, raw, ingestedAt) {
|
|
681
|
+
const row = p.row || {};
|
|
682
|
+
const occurredAt = parseTime(row.time) || raw.capturedAt || ingestedAt;
|
|
683
|
+
const outgoing = row.outgoing === 1 || row.outgoing === true;
|
|
684
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
685
|
+
// content is plain text for text messages (content_type 0/1/null); other
|
|
686
|
+
// types carry structured/empty content → emit a typed placeholder. Encoding
|
|
687
|
+
// is device-verified-schema but best-effort (no rows on reference account).
|
|
688
|
+
const isText =
|
|
689
|
+
row.content_type == null || row.content_type === 0 || row.content_type === 1;
|
|
690
|
+
const rawText =
|
|
691
|
+
isText && typeof row.content === "string" && row.content.length > 0
|
|
692
|
+
? row.content
|
|
693
|
+
: `[${row.content_type != null ? `type:${row.content_type}` : "non-text"}]`;
|
|
694
|
+
const text = rawText.length > 2000 ? rawText.slice(0, 2000) + "…" : rawText;
|
|
695
|
+
const event = {
|
|
696
|
+
id: `event-weibo-dm-${row.global_id || row.id || newId()}`,
|
|
697
|
+
type: ENTITY_TYPES.EVENT,
|
|
698
|
+
subtype: EVENT_SUBTYPES.MESSAGE,
|
|
699
|
+
occurredAt,
|
|
700
|
+
ingestedAt,
|
|
701
|
+
source,
|
|
702
|
+
actor: outgoing ? "self" : "contact",
|
|
703
|
+
content: { text },
|
|
704
|
+
extra: {
|
|
705
|
+
platform: "weibo",
|
|
706
|
+
via: "dm",
|
|
707
|
+
sessionId: row.session_id != null ? String(row.session_id) : null,
|
|
708
|
+
senderId: row.sender_id != null ? String(row.sender_id) : null,
|
|
709
|
+
contentType: row.content_type != null ? row.content_type : null,
|
|
710
|
+
outgoing,
|
|
711
|
+
},
|
|
712
|
+
};
|
|
713
|
+
return { events: [event], persons: [], places: [], items: [], topics: [] };
|
|
714
|
+
}
|
|
715
|
+
|
|
536
716
|
module.exports = {
|
|
537
717
|
WeiboAdapter,
|
|
538
718
|
NAME,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.39",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|