@chainlesschain/personal-data-hub 0.4.34 → 0.4.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -137,16 +137,50 @@ function bodyText(blob) {
|
|
|
137
137
|
* @param self the user's own QQ number (attribution fallback)
|
|
138
138
|
* @returns {Array} event objects ready for vault.putEvent
|
|
139
139
|
*/
|
|
140
|
-
|
|
140
|
+
const SELF_QQ_ID = 'person-qq-self';
|
|
141
|
+
const SRC_QQ = (originalId, at) => ({
|
|
142
|
+
adapter: 'qq-pc', adapterVersion: '0.1.0',
|
|
143
|
+
originalId: originalId || `qq-${at || 0}`,
|
|
144
|
+
capturedAt: at || Date.now(), capturedBy: 'sqlite',
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Parse a decrypted QQNT nt_msg.db into a vault batch `{events, persons, topics}`
|
|
149
|
+
* (mirrors wechat-collect): named contacts (sender nickname 40090), canonical
|
|
150
|
+
* self (sender uid 40020 === matched account uid → person-qq-self), group
|
|
151
|
+
* topics, clean titles, and a UNIQUE source.originalId per person/topic (a
|
|
152
|
+
* shared one collapses every row via the persons (adapter, originalId) index).
|
|
153
|
+
*
|
|
154
|
+
* @param opts {string|{selfUid?:string, self?:string}} — selfUid = the matched
|
|
155
|
+
* account uid (from deriveAndDecrypt) for reliable self attribution; a bare
|
|
156
|
+
* string is the legacy own-QQ-number fallback.
|
|
157
|
+
*/
|
|
158
|
+
function parseEvents(Database, dbPath, opts) {
|
|
159
|
+
const selfUid = opts && typeof opts === 'object' ? opts.selfUid || '' : '';
|
|
160
|
+
const selfQQ = opts && typeof opts === 'object' ? opts.self || '' : opts || '';
|
|
141
161
|
const src = new Database(dbPath, { readonly: true });
|
|
142
162
|
const events = [];
|
|
163
|
+
const persons = new Map();
|
|
164
|
+
const topics = new Map();
|
|
143
165
|
const num = (v) => (typeof v === 'bigint' ? Number(v) : v);
|
|
166
|
+
const addPerson = (qq, uid, nick) => {
|
|
167
|
+
if (!qq) return;
|
|
168
|
+
const id = `person-qq-${qq}`;
|
|
169
|
+
if (persons.has(id)) return;
|
|
170
|
+
const nm = nick && nick.trim() && nick.trim() !== qq ? nick.trim() : null;
|
|
171
|
+
persons.set(id, {
|
|
172
|
+
type: 'person', subtype: 'contact', id,
|
|
173
|
+
names: nm ? [nm, qq] : [qq],
|
|
174
|
+
identifiers: { qq, ...(uid ? { qqUid: uid } : {}) },
|
|
175
|
+
source: SRC_QQ(id), ingestedAt: Date.now(),
|
|
176
|
+
});
|
|
177
|
+
};
|
|
144
178
|
const ingestTable = (table, isGroup) => {
|
|
145
179
|
let rows;
|
|
146
180
|
try {
|
|
147
181
|
rows = src.prepare(
|
|
148
182
|
`SELECT [40001] msgId,[40020] uid,[40011] type,[40033] sender,[40021] peer,` +
|
|
149
|
-
`[40050] t,[40800] body FROM ${table}`,
|
|
183
|
+
`[40050] t,[40090] nick,[40800] body FROM ${table}`,
|
|
150
184
|
).safeIntegers().all();
|
|
151
185
|
} catch { return; }
|
|
152
186
|
for (const r of rows) {
|
|
@@ -160,20 +194,32 @@ function parseEvents(Database, dbPath, self) {
|
|
|
160
194
|
const msgId = typeof r.msgId === 'bigint' ? r.msgId.toString() : String(r.msgId);
|
|
161
195
|
const sender = String(num(r.sender) || '');
|
|
162
196
|
const peer = String(num(r.peer) || '');
|
|
197
|
+
const uid = r.uid ? String(r.uid) : '';
|
|
198
|
+
const nick = r.nick ? String(r.nick) : '';
|
|
163
199
|
const occurredAt = num(r.t) * 1000;
|
|
164
200
|
if (!occurredAt) continue;
|
|
165
|
-
|
|
201
|
+
// Self = the sender's uid is the matched account uid. Map to canonical
|
|
202
|
+
// person-qq-self so analysis excludes the owner from contact rankings.
|
|
203
|
+
const isSelf = !!(selfUid && uid && uid === selfUid);
|
|
204
|
+
const actor = isSelf ? SELF_QQ_ID : (sender ? `person-qq-${sender}` : `person-qq-${selfQQ || 'unknown'}`);
|
|
205
|
+
if (!isSelf && sender) addPerson(sender, uid, nick);
|
|
166
206
|
const participants = [actor];
|
|
167
|
-
|
|
207
|
+
let topicId;
|
|
208
|
+
if (isGroup) {
|
|
209
|
+
topicId = `group-qq-${peer}`;
|
|
210
|
+
participants.push(topicId);
|
|
211
|
+
if (!topics.has(topicId)) topics.set(topicId, { type: 'topic', id: topicId, name: peer, source: SRC_QQ(topicId), ingestedAt: Date.now() });
|
|
212
|
+
} else {
|
|
213
|
+
participants.push(`person-qq-${peer}`);
|
|
214
|
+
}
|
|
215
|
+
const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
|
|
168
216
|
events.push({
|
|
169
217
|
type: 'event', subtype: 'message', id: `qq:${table}:${msgId}`,
|
|
170
218
|
occurredAt, actor, participants,
|
|
171
|
-
content: { text: isGroup ? `[群${peer}] ${text}` : text },
|
|
172
|
-
topics:
|
|
173
|
-
source: {
|
|
174
|
-
|
|
175
|
-
capturedAt: occurredAt, capturedBy: 'sqlite',
|
|
176
|
-
},
|
|
219
|
+
content: { title: title || '(无内容)', text: isGroup ? `[群${peer}] ${text}` : text },
|
|
220
|
+
topics: topicId ? [topicId] : undefined,
|
|
221
|
+
source: SRC_QQ(`${table}:${msgId}`, occurredAt),
|
|
222
|
+
extra: { isSelf, peer },
|
|
177
223
|
ingestedAt: Date.now(),
|
|
178
224
|
});
|
|
179
225
|
}
|
|
@@ -181,10 +227,11 @@ function parseEvents(Database, dbPath, self) {
|
|
|
181
227
|
try {
|
|
182
228
|
ingestTable('c2c_msg_table', false);
|
|
183
229
|
ingestTable('group_msg_table', true);
|
|
230
|
+
persons.set(SELF_QQ_ID, { type: 'person', subtype: 'contact', id: SELF_QQ_ID, names: ['我(QQ)'], source: SRC_QQ(SELF_QQ_ID), ingestedAt: Date.now() });
|
|
184
231
|
} finally {
|
|
185
232
|
src.close();
|
|
186
233
|
}
|
|
187
|
-
return events;
|
|
234
|
+
return { events, persons: [...persons.values()], topics: [...topics.values()] };
|
|
188
235
|
}
|
|
189
236
|
|
|
190
237
|
module.exports = { extractRand, headerHmac, deriveAndDecrypt, bodyText, parseEvents };
|
|
@@ -84,9 +84,27 @@ function deriveAndDecrypt(raw, passphrases, rawKeys) {
|
|
|
84
84
|
* Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
|
|
85
85
|
* @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
|
|
86
86
|
*/
|
|
87
|
-
|
|
87
|
+
// Self is ALWAYS the stable canonical id (mirrors adapters/wechat/normalize.js)
|
|
88
|
+
// so analysis skills exclude it from contact rankings and it never fragments.
|
|
89
|
+
const SELF_ID = 'person-wechat-self';
|
|
90
|
+
const SRC = (originalId, at) => ({
|
|
91
|
+
adapter: 'wechat', adapterVersion: '0.1.0',
|
|
92
|
+
originalId: originalId || `wechat-${at || 0}`,
|
|
93
|
+
capturedAt: at || Date.now(), capturedBy: 'sqlite',
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Parse a decrypted EnMicroMsg.db into a vault batch. Returns
|
|
98
|
+
* `{ events, persons, topics }` so the on-device analysis skills get the rich
|
|
99
|
+
* entity graph (named contacts → relations; group topics → interests; clean
|
|
100
|
+
* titles → timeline) instead of bare message events. `self` is ignored — the
|
|
101
|
+
* sender of an outbound message maps to the canonical SELF_ID.
|
|
102
|
+
*/
|
|
103
|
+
function parseEvents(Database, dbPath, _self) {
|
|
88
104
|
const src = new Database(dbPath, { readonly: true });
|
|
89
105
|
const events = [];
|
|
106
|
+
const persons = new Map(); // id -> person record
|
|
107
|
+
const topics = new Map(); // id -> topic record
|
|
90
108
|
try {
|
|
91
109
|
const nameOf = new Map();
|
|
92
110
|
try {
|
|
@@ -94,6 +112,17 @@ function parseEvents(Database, dbPath, self) {
|
|
|
94
112
|
nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
|
|
95
113
|
}
|
|
96
114
|
} catch { /* contacts optional */ }
|
|
115
|
+
const addPerson = (wxid) => {
|
|
116
|
+
if (!wxid) return;
|
|
117
|
+
const id = `person-wechat-${wxid}`;
|
|
118
|
+
if (persons.has(id)) return;
|
|
119
|
+
const nm = nameOf.get(wxid);
|
|
120
|
+
// names[0] = display name (or wxid when unresolved); keep wxid as alias.
|
|
121
|
+
const names = nm && nm !== wxid ? [nm, wxid] : [wxid];
|
|
122
|
+
// Unique originalId per person — a shared originalId collapses every row
|
|
123
|
+
// into one via the persons (adapter, originalId) unique constraint.
|
|
124
|
+
persons.set(id, { type: 'person', subtype: 'contact', id, names, identifiers: { wechatId: wxid }, source: SRC(id), ingestedAt: Date.now() });
|
|
125
|
+
};
|
|
97
126
|
const rows = src.prepare(
|
|
98
127
|
'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
|
|
99
128
|
"WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
|
|
@@ -101,7 +130,7 @@ function parseEvents(Database, dbPath, self) {
|
|
|
101
130
|
for (const r of rows) {
|
|
102
131
|
const isGroup = /@chatroom$/.test(r.talker || '');
|
|
103
132
|
let text = r.content || '';
|
|
104
|
-
let senderWxid = r.isSend ?
|
|
133
|
+
let senderWxid = r.isSend ? null : r.talker; // null = self (outbound)
|
|
105
134
|
if (isGroup && !r.isSend) {
|
|
106
135
|
const c = text.indexOf(':');
|
|
107
136
|
if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
|
|
@@ -110,25 +139,36 @@ function parseEvents(Database, dbPath, self) {
|
|
|
110
139
|
const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
|
|
111
140
|
if (!occurredAt) continue;
|
|
112
141
|
const peer = String(r.talker || '');
|
|
113
|
-
const actor = `person-wechat-${senderWxid ||
|
|
142
|
+
const actor = r.isSend ? SELF_ID : `person-wechat-${senderWxid || peer}`;
|
|
143
|
+
if (!r.isSend) addPerson(senderWxid || peer);
|
|
114
144
|
const participants = [actor];
|
|
115
|
-
|
|
145
|
+
let topicId;
|
|
146
|
+
if (isGroup) {
|
|
147
|
+
topicId = `group-wechat-${peer}`;
|
|
148
|
+
participants.push(topicId);
|
|
149
|
+
if (!topics.has(topicId)) {
|
|
150
|
+
topics.set(topicId, { type: 'topic', id: topicId, name: nameOf.get(peer) || peer.replace('@chatroom', ''), source: SRC(topicId), ingestedAt: Date.now() });
|
|
151
|
+
}
|
|
152
|
+
} else {
|
|
153
|
+
addPerson(peer);
|
|
154
|
+
participants.push(`person-wechat-${peer}`);
|
|
155
|
+
}
|
|
156
|
+
const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
|
|
116
157
|
events.push({
|
|
117
158
|
type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
|
|
118
159
|
occurredAt, actor, participants,
|
|
119
|
-
content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
|
|
120
|
-
topics:
|
|
121
|
-
source:
|
|
122
|
-
|
|
123
|
-
capturedAt: occurredAt, capturedBy: 'sqlite',
|
|
124
|
-
},
|
|
160
|
+
content: { title: title || '(无内容)', text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
|
|
161
|
+
topics: topicId ? [topicId] : undefined,
|
|
162
|
+
source: SRC(String(r.msgId), occurredAt),
|
|
163
|
+
extra: { isSend: !!r.isSend, talker: r.talker },
|
|
125
164
|
ingestedAt: Date.now(),
|
|
126
165
|
});
|
|
127
166
|
}
|
|
167
|
+
persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
|
|
128
168
|
} finally {
|
|
129
169
|
src.close();
|
|
130
170
|
}
|
|
131
|
-
return events;
|
|
171
|
+
return { events, persons: [...persons.values()], topics: [...topics.values()] };
|
|
132
172
|
}
|
|
133
173
|
|
|
134
174
|
module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.35",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|