@chainlesschain/personal-data-hub 0.4.34 → 0.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,16 +137,50 @@ function bodyText(blob) {
137
137
  * @param self the user's own QQ number (attribution fallback)
138
138
  * @returns {Array} event objects ready for vault.putEvent
139
139
  */
140
- function parseEvents(Database, dbPath, self) {
140
+ const SELF_QQ_ID = 'person-qq-self';
141
+ const SRC_QQ = (originalId, at) => ({
142
+ adapter: 'qq-pc', adapterVersion: '0.1.0',
143
+ originalId: originalId || `qq-${at || 0}`,
144
+ capturedAt: at || Date.now(), capturedBy: 'sqlite',
145
+ });
146
+
147
+ /**
148
+ * Parse a decrypted QQNT nt_msg.db into a vault batch `{events, persons, topics}`
149
+ * (mirrors wechat-collect): named contacts (sender nickname 40090), canonical
150
+ * self (sender uid 40020 === matched account uid → person-qq-self), group
151
+ * topics, clean titles, and a UNIQUE source.originalId per person/topic (a
152
+ * shared one collapses every row via the persons (adapter, originalId) index).
153
+ *
154
+ * @param opts {string|{selfUid?:string, self?:string}} — selfUid = the matched
155
+ * account uid (from deriveAndDecrypt) for reliable self attribution; a bare
156
+ * string is the legacy own-QQ-number fallback.
157
+ */
158
+ function parseEvents(Database, dbPath, opts) {
159
+ const selfUid = opts && typeof opts === 'object' ? opts.selfUid || '' : '';
160
+ const selfQQ = opts && typeof opts === 'object' ? opts.self || '' : opts || '';
141
161
  const src = new Database(dbPath, { readonly: true });
142
162
  const events = [];
163
+ const persons = new Map();
164
+ const topics = new Map();
143
165
  const num = (v) => (typeof v === 'bigint' ? Number(v) : v);
166
+ const addPerson = (qq, uid, nick) => {
167
+ if (!qq) return;
168
+ const id = `person-qq-${qq}`;
169
+ if (persons.has(id)) return;
170
+ const nm = nick && nick.trim() && nick.trim() !== qq ? nick.trim() : null;
171
+ persons.set(id, {
172
+ type: 'person', subtype: 'contact', id,
173
+ names: nm ? [nm, qq] : [qq],
174
+ identifiers: { qq, ...(uid ? { qqUid: uid } : {}) },
175
+ source: SRC_QQ(id), ingestedAt: Date.now(),
176
+ });
177
+ };
144
178
  const ingestTable = (table, isGroup) => {
145
179
  let rows;
146
180
  try {
147
181
  rows = src.prepare(
148
182
  `SELECT [40001] msgId,[40020] uid,[40011] type,[40033] sender,[40021] peer,` +
149
- `[40050] t,[40800] body FROM ${table}`,
183
+ `[40050] t,[40090] nick,[40800] body FROM ${table}`,
150
184
  ).safeIntegers().all();
151
185
  } catch { return; }
152
186
  for (const r of rows) {
@@ -160,20 +194,32 @@ function parseEvents(Database, dbPath, self) {
160
194
  const msgId = typeof r.msgId === 'bigint' ? r.msgId.toString() : String(r.msgId);
161
195
  const sender = String(num(r.sender) || '');
162
196
  const peer = String(num(r.peer) || '');
197
+ const uid = r.uid ? String(r.uid) : '';
198
+ const nick = r.nick ? String(r.nick) : '';
163
199
  const occurredAt = num(r.t) * 1000;
164
200
  if (!occurredAt) continue;
165
- const actor = sender ? `person-qq-${sender}` : `person-qq-${self}`;
201
+ // Self = the sender's uid is the matched account uid. Map to canonical
202
+ // person-qq-self so analysis excludes the owner from contact rankings.
203
+ const isSelf = !!(selfUid && uid && uid === selfUid);
204
+ const actor = isSelf ? SELF_QQ_ID : (sender ? `person-qq-${sender}` : `person-qq-${selfQQ || 'unknown'}`);
205
+ if (!isSelf && sender) addPerson(sender, uid, nick);
166
206
  const participants = [actor];
167
- participants.push(isGroup ? `group-qq-${peer}` : `person-qq-${peer}`);
207
+ let topicId;
208
+ if (isGroup) {
209
+ topicId = `group-qq-${peer}`;
210
+ participants.push(topicId);
211
+ if (!topics.has(topicId)) topics.set(topicId, { type: 'topic', id: topicId, name: peer, source: SRC_QQ(topicId), ingestedAt: Date.now() });
212
+ } else {
213
+ participants.push(`person-qq-${peer}`);
214
+ }
215
+ const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
168
216
  events.push({
169
217
  type: 'event', subtype: 'message', id: `qq:${table}:${msgId}`,
170
218
  occurredAt, actor, participants,
171
- content: { text: isGroup ? `[群${peer}] ${text}` : text },
172
- topics: isGroup ? [`group-qq-${peer}`] : undefined,
173
- source: {
174
- adapter: 'qq-pc', adapterVersion: '0.1.0', originalId: `${table}:${msgId}`,
175
- capturedAt: occurredAt, capturedBy: 'sqlite',
176
- },
219
+ content: { title: title || '(无内容)', text: isGroup ? `[群${peer}] ${text}` : text },
220
+ topics: topicId ? [topicId] : undefined,
221
+ source: SRC_QQ(`${table}:${msgId}`, occurredAt),
222
+ extra: { isSelf, peer },
177
223
  ingestedAt: Date.now(),
178
224
  });
179
225
  }
@@ -181,10 +227,11 @@ function parseEvents(Database, dbPath, self) {
181
227
  try {
182
228
  ingestTable('c2c_msg_table', false);
183
229
  ingestTable('group_msg_table', true);
230
+ persons.set(SELF_QQ_ID, { type: 'person', subtype: 'contact', id: SELF_QQ_ID, names: ['我(QQ)'], source: SRC_QQ(SELF_QQ_ID), ingestedAt: Date.now() });
184
231
  } finally {
185
232
  src.close();
186
233
  }
187
- return events;
234
+ return { events, persons: [...persons.values()], topics: [...topics.values()] };
188
235
  }
189
236
 
190
237
  module.exports = { extractRand, headerHmac, deriveAndDecrypt, bodyText, parseEvents };
@@ -84,9 +84,27 @@ function deriveAndDecrypt(raw, passphrases, rawKeys) {
84
84
  * Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
85
85
  * @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
86
86
  */
87
- function parseEvents(Database, dbPath, self) {
87
+ // Self is ALWAYS the stable canonical id (mirrors adapters/wechat/normalize.js)
88
+ // so analysis skills exclude it from contact rankings and it never fragments.
89
+ const SELF_ID = 'person-wechat-self';
90
+ const SRC = (originalId, at) => ({
91
+ adapter: 'wechat', adapterVersion: '0.1.0',
92
+ originalId: originalId || `wechat-${at || 0}`,
93
+ capturedAt: at || Date.now(), capturedBy: 'sqlite',
94
+ });
95
+
96
+ /**
97
+ * Parse a decrypted EnMicroMsg.db into a vault batch. Returns
98
+ * `{ events, persons, topics }` so the on-device analysis skills get the rich
99
+ * entity graph (named contacts → relations; group topics → interests; clean
100
+ * titles → timeline) instead of bare message events. `self` is ignored — the
101
+ * sender of an outbound message maps to the canonical SELF_ID.
102
+ */
103
+ function parseEvents(Database, dbPath, _self) {
88
104
  const src = new Database(dbPath, { readonly: true });
89
105
  const events = [];
106
+ const persons = new Map(); // id -> person record
107
+ const topics = new Map(); // id -> topic record
90
108
  try {
91
109
  const nameOf = new Map();
92
110
  try {
@@ -94,6 +112,17 @@ function parseEvents(Database, dbPath, self) {
94
112
  nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
95
113
  }
96
114
  } catch { /* contacts optional */ }
115
+ const addPerson = (wxid) => {
116
+ if (!wxid) return;
117
+ const id = `person-wechat-${wxid}`;
118
+ if (persons.has(id)) return;
119
+ const nm = nameOf.get(wxid);
120
+ // names[0] = display name (or wxid when unresolved); keep wxid as alias.
121
+ const names = nm && nm !== wxid ? [nm, wxid] : [wxid];
122
+ // Unique originalId per person — a shared originalId collapses every row
123
+ // into one via the persons (adapter, originalId) unique constraint.
124
+ persons.set(id, { type: 'person', subtype: 'contact', id, names, identifiers: { wechatId: wxid }, source: SRC(id), ingestedAt: Date.now() });
125
+ };
97
126
  const rows = src.prepare(
98
127
  'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
99
128
  "WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
@@ -101,7 +130,7 @@ function parseEvents(Database, dbPath, self) {
101
130
  for (const r of rows) {
102
131
  const isGroup = /@chatroom$/.test(r.talker || '');
103
132
  let text = r.content || '';
104
- let senderWxid = r.isSend ? self : r.talker;
133
+ let senderWxid = r.isSend ? null : r.talker; // null = self (outbound)
105
134
  if (isGroup && !r.isSend) {
106
135
  const c = text.indexOf(':');
107
136
  if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
@@ -110,25 +139,36 @@ function parseEvents(Database, dbPath, self) {
110
139
  const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
111
140
  if (!occurredAt) continue;
112
141
  const peer = String(r.talker || '');
113
- const actor = `person-wechat-${senderWxid || self}`;
142
+ const actor = r.isSend ? SELF_ID : `person-wechat-${senderWxid || peer}`;
143
+ if (!r.isSend) addPerson(senderWxid || peer);
114
144
  const participants = [actor];
115
- participants.push(isGroup ? `group-wechat-${peer}` : `person-wechat-${peer}`);
145
+ let topicId;
146
+ if (isGroup) {
147
+ topicId = `group-wechat-${peer}`;
148
+ participants.push(topicId);
149
+ if (!topics.has(topicId)) {
150
+ topics.set(topicId, { type: 'topic', id: topicId, name: nameOf.get(peer) || peer.replace('@chatroom', ''), source: SRC(topicId), ingestedAt: Date.now() });
151
+ }
152
+ } else {
153
+ addPerson(peer);
154
+ participants.push(`person-wechat-${peer}`);
155
+ }
156
+ const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
116
157
  events.push({
117
158
  type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
118
159
  occurredAt, actor, participants,
119
- content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
120
- topics: isGroup ? [`group-wechat-${peer}`] : undefined,
121
- source: {
122
- adapter: 'wechat', adapterVersion: '0.1.0', originalId: String(r.msgId),
123
- capturedAt: occurredAt, capturedBy: 'sqlite',
124
- },
160
+ content: { title: title || '(无内容)', text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
161
+ topics: topicId ? [topicId] : undefined,
162
+ source: SRC(String(r.msgId), occurredAt),
163
+ extra: { isSend: !!r.isSend, talker: r.talker },
125
164
  ingestedAt: Date.now(),
126
165
  });
127
166
  }
167
+ persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
128
168
  } finally {
129
169
  src.close();
130
170
  }
131
- return events;
171
+ return { events, persons: [...persons.values()], topics: [...topics.values()] };
132
172
  }
133
173
 
134
174
  module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.34",
3
+ "version": "0.4.35",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",