@chainlesschain/personal-data-hub 0.4.27 → 0.4.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/wechat-pc-group-topic.test.js +63 -0
- package/__tests__/analysis-skills.test.js +54 -0
- package/lib/adapters/wechat-pc/index.js +24 -1
- package/lib/analysis-skills/interests.js +34 -14
- package/lib/analysis-skills/timeline.js +6 -1
- package/lib/vault.js +12 -0
- package/package.json +1 -1
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { WeChatPcAdapter } = require("../../lib/adapters/wechat-pc");
|
|
6
|
+
|
|
7
|
+
// Build the raw envelope shape that WeChatPcAdapter.normalize() consumes for a
|
|
8
|
+
// group message (the sync() generator yields { kind:"message", payload, ... }).
|
|
9
|
+
function groupMessageRaw(payload) {
|
|
10
|
+
return {
|
|
11
|
+
adapter: "wechat-pc",
|
|
12
|
+
kind: "message",
|
|
13
|
+
originalId: "orig-1",
|
|
14
|
+
capturedAt: 1780000000000,
|
|
15
|
+
payload: { kind: "message", ...payload },
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("wechat-pc — group topic naming", () => {
|
|
20
|
+
it("uses the resolved group display name for the topic when groupName is present", () => {
|
|
21
|
+
const adapter = new WeChatPcAdapter();
|
|
22
|
+
const out = adapter.normalize(groupMessageRaw({
|
|
23
|
+
talker: "45498354778@chatroom",
|
|
24
|
+
isGroup: true,
|
|
25
|
+
senderWxid: "wxid_friend",
|
|
26
|
+
groupName: "家庭群",
|
|
27
|
+
text: "晚饭吃什么",
|
|
28
|
+
createdTimeMs: 1780000000000,
|
|
29
|
+
}));
|
|
30
|
+
expect(out.topics).toHaveLength(1);
|
|
31
|
+
// Stable id keyed on the chatroom wxid (identity unchanged)...
|
|
32
|
+
expect(out.topics[0].id).toBe("topic-wechat-group-45498354778@chatroom");
|
|
33
|
+
// ...but the human-readable display name is used, NOT the numeric id.
|
|
34
|
+
expect(out.topics[0].name).toBe("家庭群");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("falls back to the raw numeric id when no group name was resolved", () => {
|
|
38
|
+
const adapter = new WeChatPcAdapter();
|
|
39
|
+
const out = adapter.normalize(groupMessageRaw({
|
|
40
|
+
talker: "45498354778@chatroom",
|
|
41
|
+
isGroup: true,
|
|
42
|
+
senderWxid: "wxid_friend",
|
|
43
|
+
groupName: null,
|
|
44
|
+
text: "hi",
|
|
45
|
+
createdTimeMs: 1780000000000,
|
|
46
|
+
}));
|
|
47
|
+
expect(out.topics).toHaveLength(1);
|
|
48
|
+
expect(out.topics[0].name).toBe("45498354778");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("blank/whitespace group name falls back to the raw id (no empty topic name)", () => {
|
|
52
|
+
const adapter = new WeChatPcAdapter();
|
|
53
|
+
const out = adapter.normalize(groupMessageRaw({
|
|
54
|
+
talker: "12345@chatroom",
|
|
55
|
+
isGroup: true,
|
|
56
|
+
senderWxid: "wxid_x",
|
|
57
|
+
groupName: " ",
|
|
58
|
+
text: "hi",
|
|
59
|
+
createdTimeMs: 1780000000000,
|
|
60
|
+
}));
|
|
61
|
+
expect(out.topics[0].name).toBe("12345");
|
|
62
|
+
});
|
|
63
|
+
});
|
|
@@ -498,6 +498,32 @@ describe("InterestsSkill", () => {
|
|
|
498
498
|
expect(r.topTopics[0].name).toBe("Travel");
|
|
499
499
|
expect(r.llmInterests).toBeNull();
|
|
500
500
|
});
|
|
501
|
+
|
|
502
|
+
it("drops unresolved numeric group-id topics (e.g. WeChat chatroom ids) from the profile", async () => {
|
|
503
|
+
// Real interest topic
|
|
504
|
+
rig.vault.putTopic({
|
|
505
|
+
id: "topic-doubao", type: "topic", name: "豆包",
|
|
506
|
+
derivedFromEvents: ["e1"],
|
|
507
|
+
ingestedAt: Date.now(), source: defaultSource("test"),
|
|
508
|
+
});
|
|
509
|
+
// Unresolved group-chat topics named by raw numeric chatroom id — noise.
|
|
510
|
+
rig.vault.putTopic({
|
|
511
|
+
id: "topic-g1", type: "topic", name: "45498354778",
|
|
512
|
+
derivedFromEvents: [],
|
|
513
|
+
ingestedAt: Date.now() + 1, source: defaultSource("test"),
|
|
514
|
+
});
|
|
515
|
+
rig.vault.putTopic({
|
|
516
|
+
id: "topic-g2", type: "topic", name: "54346634535",
|
|
517
|
+
derivedFromEvents: [],
|
|
518
|
+
ingestedAt: Date.now() + 2, source: defaultSource("test"),
|
|
519
|
+
});
|
|
520
|
+
const skill = new InterestsSkill({ vault: rig.vault });
|
|
521
|
+
const r = await skill.run({});
|
|
522
|
+
const names = r.topTopics.map((t) => t.name);
|
|
523
|
+
expect(names).toContain("豆包");
|
|
524
|
+
expect(names).not.toContain("45498354778");
|
|
525
|
+
expect(names).not.toContain("54346634535");
|
|
526
|
+
});
|
|
501
527
|
});
|
|
502
528
|
|
|
503
529
|
// ─── TimelineSkill ──────────────────────────────────────────────────────
|
|
@@ -534,6 +560,34 @@ describe("TimelineSkill", () => {
|
|
|
534
560
|
const r = await skill.run({ since: ts(2026, 4, 1) });
|
|
535
561
|
expect(r.llm_narrative).toBe("你这周点了一次外卖。");
|
|
536
562
|
});
|
|
563
|
+
|
|
564
|
+
it("excludes inventory-snapshot events (installed-app / contact roster) from the narrative", async () => {
|
|
565
|
+
// Real activity event (extra has no `kind` → must be kept)
|
|
566
|
+
makePayment(rig.vault, { id: "act-1", occurredAt: ts(2026, 5, 1), counterpartyName: "美团", amount: 10, adapter: "alipay-bill", title: "外卖" });
|
|
567
|
+
// Inventory-snapshot events stamped at a LATER (collection) time — these
|
|
568
|
+
// would dominate a DESC time query but must be filtered out.
|
|
569
|
+
rig.vault.putEvent({
|
|
570
|
+
id: "event-android-app-com.x", type: "event", subtype: "other",
|
|
571
|
+
occurredAt: ts(2026, 6, 1), actor: "person-self",
|
|
572
|
+
content: { title: "应用:X" },
|
|
573
|
+
ingestedAt: Date.now(), source: defaultSource("system-data-android"),
|
|
574
|
+
extra: { kind: "app-snapshot", packageName: "com.x" },
|
|
575
|
+
});
|
|
576
|
+
rig.vault.putEvent({
|
|
577
|
+
id: "event-android-contact-y", type: "event", subtype: "other",
|
|
578
|
+
occurredAt: ts(2026, 6, 1), actor: "person-self",
|
|
579
|
+
content: { title: "联系人:Y" },
|
|
580
|
+
ingestedAt: Date.now(), source: defaultSource("system-data-android"),
|
|
581
|
+
extra: { kind: "contact-snapshot" },
|
|
582
|
+
});
|
|
583
|
+
const skill = new TimelineSkill({ vault: rig.vault });
|
|
584
|
+
const r = await skill.run({ since: ts(2026, 4, 1) });
|
|
585
|
+
const ids = r.entries.map((e) => e.id);
|
|
586
|
+
expect(ids).toContain("act-1");
|
|
587
|
+
expect(ids).not.toContain("event-android-app-com.x");
|
|
588
|
+
expect(ids).not.toContain("event-android-contact-y");
|
|
589
|
+
expect(r.summary.totalEvents).toBe(1);
|
|
590
|
+
});
|
|
537
591
|
});
|
|
538
592
|
|
|
539
593
|
// ─── runAnalysisSkill dispatcher ─────────────────────────────────────────
|
|
@@ -290,6 +290,22 @@ class WeChatPcAdapter {
|
|
|
290
290
|
null;
|
|
291
291
|
const fallbackCapturedAt = Date.now();
|
|
292
292
|
const messages = (result && Array.isArray(result.messages)) ? result.messages : [];
|
|
293
|
+
// Harvest group display names from the contact roster: WeChat stores group
|
|
294
|
+
// chatrooms (wxid ending @chatroom) in contact.db with a nickname/remark.
|
|
295
|
+
// They are skipped as Person entities below, but their names let us label
|
|
296
|
+
// group Topics with a human-readable name instead of the raw numeric id.
|
|
297
|
+
const groupNames = new Map();
|
|
298
|
+
{
|
|
299
|
+
const contactsForNames = (result && Array.isArray(result.contacts)) ? result.contacts : [];
|
|
300
|
+
for (const c of contactsForNames) {
|
|
301
|
+
if (!c || typeof c.wxid !== "string" || !c.wxid.endsWith("@chatroom")) continue;
|
|
302
|
+
const nm =
|
|
303
|
+
(typeof c.remark === "string" && c.remark.trim()) ||
|
|
304
|
+
(typeof c.nickname === "string" && c.nickname.trim()) ||
|
|
305
|
+
"";
|
|
306
|
+
if (nm) groupNames.set(c.wxid, nm);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
293
309
|
let emitted = 0;
|
|
294
310
|
// The sidecar already applied `limit` across all sources (chat/biz/sns/
|
|
295
311
|
// favorite). Yield everything it returned — do NOT re-cap here, or the
|
|
@@ -311,6 +327,7 @@ class WeChatPcAdapter {
|
|
|
311
327
|
text: typeof m.text === "string" ? m.text : "",
|
|
312
328
|
senderWxid: isGroup ? (m.sender || null) : null,
|
|
313
329
|
isGroup,
|
|
330
|
+
groupName: isGroup && conv ? (groupNames.get(conv) || null) : null,
|
|
314
331
|
contentBlob: typeof m.text === "string" ? m.text : null,
|
|
315
332
|
// provenance: chat | biz(公众号) | sns(朋友圈) | favorite(收藏)
|
|
316
333
|
wechatSource: typeof m.source === "string" ? m.source : "chat",
|
|
@@ -423,10 +440,16 @@ function normalizeMessage(p, raw, ingestedAt) {
|
|
|
423
440
|
|
|
424
441
|
const topics = [];
|
|
425
442
|
if (isGroup && p.talker) {
|
|
443
|
+
// Prefer the resolved group display name (harvested from contact.db in
|
|
444
|
+
// sync()); fall back to the raw numeric chatroom id only when unknown.
|
|
445
|
+
const groupName =
|
|
446
|
+
(typeof p.groupName === "string" && p.groupName.trim())
|
|
447
|
+
? p.groupName.trim()
|
|
448
|
+
: p.talker.replace("@chatroom", "");
|
|
426
449
|
topics.push({
|
|
427
450
|
id: `topic-wechat-group-${p.talker}`,
|
|
428
451
|
type: ENTITY_TYPES.TOPIC,
|
|
429
|
-
name:
|
|
452
|
+
name: groupName,
|
|
430
453
|
ingestedAt,
|
|
431
454
|
source,
|
|
432
455
|
extra: { platform: "wechat", source: "pc", wxid: p.talker },
|
|
@@ -28,6 +28,21 @@
|
|
|
28
28
|
|
|
29
29
|
const { AnalysisSkill } = require("./base");
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* A topic name carries real interest signal only if it is a human-readable
|
|
33
|
+
* label. Unresolved group-chat IDs (e.g. WeChat group topics named by their
|
|
34
|
+
* raw numeric chatroom id "45498354778") and empty names are NOT interests —
|
|
35
|
+
* they would crowd out genuine topics (coffee, photography, 豆包...) in the
|
|
36
|
+
* profile. Drop them.
|
|
37
|
+
*/
|
|
38
|
+
function isMeaningfulTopicName(name) {
|
|
39
|
+
if (typeof name !== "string") return false;
|
|
40
|
+
const s = name.trim();
|
|
41
|
+
if (s.length === 0) return false;
|
|
42
|
+
if (/^\d+$/.test(s)) return false; // pure-numeric = unresolved group id
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
|
|
31
46
|
class InterestsSkill extends AnalysisSkill {
|
|
32
47
|
constructor(opts) {
|
|
33
48
|
super({ ...opts, name: "analysis.interests" });
|
|
@@ -61,25 +76,30 @@ class InterestsSkill extends AnalysisSkill {
|
|
|
61
76
|
let topics = [];
|
|
62
77
|
try {
|
|
63
78
|
const db = this.vault._requireOpen();
|
|
79
|
+
// Over-fetch (×20, capped) before filtering: vaults can hold thousands
|
|
80
|
+
// of unresolved numeric group-chat topics that would otherwise starve
|
|
81
|
+
// the few human-readable interest topics out of the top-N budget.
|
|
64
82
|
topics = db.prepare(
|
|
65
83
|
"SELECT id, name, derived_from_events, ingested_at FROM topics ORDER BY ingested_at DESC LIMIT ?"
|
|
66
|
-
).all(topN *
|
|
84
|
+
).all(Math.min(topN * 20, 2000));
|
|
67
85
|
} catch (_e) {
|
|
68
86
|
// Older vaults may not have topics; non-fatal.
|
|
69
87
|
}
|
|
70
|
-
const mapped = topics
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
88
|
+
const mapped = topics
|
|
89
|
+
.filter((t) => isMeaningfulTopicName(t.name))
|
|
90
|
+
.map((t) => {
|
|
91
|
+
let eventCount = 0;
|
|
92
|
+
try {
|
|
93
|
+
const arr = t.derived_from_events ? JSON.parse(t.derived_from_events) : [];
|
|
94
|
+
if (Array.isArray(arr)) eventCount = arr.length;
|
|
95
|
+
} catch (_e) {}
|
|
96
|
+
return {
|
|
97
|
+
id: t.id,
|
|
98
|
+
name: t.name,
|
|
99
|
+
eventCount,
|
|
100
|
+
lastSeen: t.ingested_at || null,
|
|
101
|
+
};
|
|
102
|
+
});
|
|
83
103
|
return mapped
|
|
84
104
|
.sort((a, b) => (b.eventCount - a.eventCount) || ((b.lastSeen || 0) - (a.lastSeen || 0)))
|
|
85
105
|
.slice(0, topN);
|
|
@@ -63,7 +63,12 @@ class TimelineSkill extends AnalysisSkill {
|
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
_fetchEvents({ since, until }, limit) {
|
|
66
|
-
|
|
66
|
+
// Exclude inventory-snapshot events (installed-app roster + contact
|
|
67
|
+
// roster from system-data-android). They carry a synthetic
|
|
68
|
+
// collection-time occurredAt — tens of thousands of them cluster at one
|
|
69
|
+
// recent timestamp and would otherwise crowd out real activity from this
|
|
70
|
+
// chronological narrative. They remain in the vault for facet counts.
|
|
71
|
+
const q = { limit, excludeExtraKinds: ["app-snapshot", "contact-snapshot"] };
|
|
67
72
|
if (since != null) q.since = since;
|
|
68
73
|
if (until != null) q.until = until;
|
|
69
74
|
const events = this.vault.queryEvents(q) || [];
|
package/lib/vault.js
CHANGED
|
@@ -811,6 +811,18 @@ class LocalVault {
|
|
|
811
811
|
where.push("source_adapter = @adapter");
|
|
812
812
|
params.adapter = q.adapter;
|
|
813
813
|
}
|
|
814
|
+
if (Array.isArray(q.excludeExtraKinds) && q.excludeExtraKinds.length > 0) {
|
|
815
|
+
// Exclude inventory-snapshot events (e.g. installed-app / contact-roster
|
|
816
|
+
// facet events) whose extra.kind is in the given list. Those carry a
|
|
817
|
+
// synthetic collection-time occurredAt and would otherwise dominate any
|
|
818
|
+
// time-ordered (occurred_at DESC) query. Rows with no extra.kind are kept.
|
|
819
|
+
const placeholders = q.excludeExtraKinds.map((_v, i) => `@xk${i}`);
|
|
820
|
+
q.excludeExtraKinds.forEach((v, i) => { params[`xk${i}`] = v; });
|
|
821
|
+
where.push(
|
|
822
|
+
"(json_extract(extra, '$.kind') IS NULL OR json_extract(extra, '$.kind') NOT IN (" +
|
|
823
|
+
placeholders.join(", ") + "))",
|
|
824
|
+
);
|
|
825
|
+
}
|
|
814
826
|
|
|
815
827
|
const limit = Number.isInteger(q.limit) && q.limit > 0 ? Math.min(q.limit, 10000) : 100;
|
|
816
828
|
const offset = Number.isInteger(q.offset) && q.offset >= 0 ? q.offset : 0;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.28",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|