@chainlesschain/personal-data-hub 0.4.25 → 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/analysis-skills.test.js +71 -2
- package/__tests__/analysis.test.js +46 -0
- package/__tests__/salvage-ingest.test.js +97 -0
- package/__tests__/social-douyin-im-direct-read.test.js +69 -3
- package/__tests__/social-douyin-salvage-collector.test.js +98 -0
- package/__tests__/social-douyin-salvage-mapper.test.js +90 -0
- package/__tests__/social-weibo-sqlite-device.test.js +174 -0
- package/__tests__/sqlite-leaf-salvage.test.js +97 -0
- package/lib/adapters/social-douyin/index.js +56 -2
- package/lib/adapters/social-douyin-adb/collector.js +100 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +85 -0
- package/lib/adapters/social-douyin-adb/index.js +5 -0
- package/lib/adapters/social-douyin-adb/salvage-mapper.js +119 -0
- package/lib/adapters/social-weibo/index.js +110 -30
- package/lib/analysis-skills/index.js +3 -0
- package/lib/analysis-skills/overview.js +157 -0
- package/lib/analysis.js +50 -0
- package/lib/forensics/leaf-salvage.js +185 -0
- package/lib/forensics/salvage-ingest.js +160 -0
- package/lib/prompt-builder.js +9 -0
- package/package.json +4 -2
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
* Glue: leaf-salvaged records → parseImDb-shaped output.
|
|
4
|
+
*
|
|
5
|
+
* The leaf-page salvager (scripts/android/pdh-sqlite-leaf-salvage.js) emits raw
|
|
6
|
+
* positional tuples {rowid, cols:[...]} (leaf pages carry no column names). This
|
|
7
|
+
* maps them into the SAME shape `parseImDb` returns ({messages, contacts,
|
|
8
|
+
* conversations}) so the existing DouyinAdapter.normalize path ingests them
|
|
9
|
+
* unchanged — closing the loop: Method-B dump → salvage → THIS → PDH entities.
|
|
10
|
+
*
|
|
11
|
+
* Column order comes from the table's CREATE TABLE (see docs/internal/
|
|
12
|
+
* pdh-app-db-schemas.md or grep the dump). Pass it explicitly for correctness;
|
|
13
|
+
* `inferMsgColumns` offers a heuristic fallback (content=JSON/longest text,
|
|
14
|
+
* created_time=epoch int) when the exact order is unknown.
|
|
15
|
+
*/
|
|
16
|
+
const { _internals } = require("./im-db-parser");
|
|
17
|
+
const { extractTextFromContent, normalizeEpochMs } = _internals;
|
|
18
|
+
|
|
19
|
+
function zip(cols, names) {
|
|
20
|
+
const o = {};
|
|
21
|
+
for (let i = 0; i < names.length; i++) o[names[i]] = cols[i];
|
|
22
|
+
return o;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Map msg-table salvaged records given the ordered column names.
|
|
26
|
+
function mapMsgRecords(records, columns) {
|
|
27
|
+
const out = [];
|
|
28
|
+
for (const r of records || []) {
|
|
29
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
30
|
+
const row = zip(r.cols, columns);
|
|
31
|
+
if (row.content == null && row.created_time == null) continue;
|
|
32
|
+
const t = typeof row.created_time === "number" ? row.created_time
|
|
33
|
+
: Number(row.created_time);
|
|
34
|
+
out.push({
|
|
35
|
+
senderUid: row.sender != null ? String(row.sender) : null,
|
|
36
|
+
conversationId: row.conversation_id != null ? String(row.conversation_id) : null,
|
|
37
|
+
createdTimeMs: normalizeEpochMs(Number.isFinite(t) ? t : 0),
|
|
38
|
+
text: extractTextFromContent(row.content),
|
|
39
|
+
readStatus: typeof row.read_status === "number" ? row.read_status : null,
|
|
40
|
+
contentBlob: typeof row.content === "string" ? row.content : null,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
return out;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function mapParticipantRecords(records, columns) {
|
|
47
|
+
const seen = new Set();
|
|
48
|
+
const out = [];
|
|
49
|
+
for (const r of records || []) {
|
|
50
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
51
|
+
const row = zip(r.cols, columns);
|
|
52
|
+
const uid = row.user_id != null ? String(row.user_id) : null;
|
|
53
|
+
if (!uid || seen.has(uid)) continue;
|
|
54
|
+
seen.add(uid);
|
|
55
|
+
out.push({ uid, shortId: null, name: null, avatarUrl: null, followStatus: null, fromParticipant: true });
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function mapConversationRecords(records, columns) {
|
|
61
|
+
const out = [];
|
|
62
|
+
for (const r of records || []) {
|
|
63
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
64
|
+
const row = zip(r.cols, columns);
|
|
65
|
+
if (row.conversation_id == null) continue;
|
|
66
|
+
out.push({
|
|
67
|
+
conversationId: String(row.conversation_id),
|
|
68
|
+
conversationType: typeof row.type === "number" ? row.type : null,
|
|
69
|
+
lastMsgTimeMs: normalizeEpochMs(Number(row.last_msg_create_time) || 0),
|
|
70
|
+
stranger: typeof row.stranger === "number" ? row.stranger === 1 : null,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Heuristic: when the exact `msg` column order is unknown, guess content +
|
|
78
|
+
* created_time positions from value shapes (content = a JSON-ish / longest
|
|
79
|
+
* string; created_time = the largest plausible-epoch integer). Returns a column
|
|
80
|
+
* name array usable with mapMsgRecords (unknown slots get c0,c1,...).
|
|
81
|
+
*/
|
|
82
|
+
function inferMsgColumns(records) {
|
|
83
|
+
const sample = (records || []).find((r) => r && Array.isArray(r.cols) && r.cols.length >= 3);
|
|
84
|
+
if (!sample) return [];
|
|
85
|
+
const cols = sample.cols;
|
|
86
|
+
const names = cols.map((_, i) => `c${i}`);
|
|
87
|
+
let contentIdx = -1, contentScore = -1;
|
|
88
|
+
let timeIdx = -1, timeVal = -1;
|
|
89
|
+
for (let i = 0; i < cols.length; i++) {
|
|
90
|
+
const v = cols[i];
|
|
91
|
+
if (typeof v === "string") {
|
|
92
|
+
const score = (v.trim().startsWith("{") ? 1e6 : 0) + v.length;
|
|
93
|
+
if (score > contentScore) { contentScore = score; contentIdx = i; }
|
|
94
|
+
} else if (typeof v === "number" && v > 1e9 && v > timeVal) {
|
|
95
|
+
// largest epoch-ish int → created_time (ms/sec/us all > 1e9)
|
|
96
|
+
timeVal = v; timeIdx = i;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (contentIdx >= 0) names[contentIdx] = "content";
|
|
100
|
+
if (timeIdx >= 0 && timeIdx !== contentIdx) names[timeIdx] = "created_time";
|
|
101
|
+
return names;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// One-shot: salvaged records (mixed) → parseImDb shape, given per-table columns.
|
|
105
|
+
function mapSalvaged({ msg, participant, conversation } = {}) {
|
|
106
|
+
return {
|
|
107
|
+
messages: msg ? mapMsgRecords(msg.records, msg.columns) : [],
|
|
108
|
+
contacts: participant ? mapParticipantRecords(participant.records, participant.columns) : [],
|
|
109
|
+
conversations: conversation ? mapConversationRecords(conversation.records, conversation.columns) : [],
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
mapMsgRecords,
|
|
115
|
+
mapParticipantRecords,
|
|
116
|
+
mapConversationRecords,
|
|
117
|
+
inferMsgColumns,
|
|
118
|
+
mapSalvaged,
|
|
119
|
+
};
|
|
@@ -9,10 +9,23 @@
|
|
|
9
9
|
* — account.uid is OPTIONAL at construction (the snapshot file carries
|
|
10
10
|
* account in payload).
|
|
11
11
|
*
|
|
12
|
-
* 2. sqlite mode (opts.dbPath
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* 2. sqlite mode (opts.dbPath): desktop device-pull path — reads the Weibo
|
|
13
|
+
* Android app's plain SQLite DB `com.sina.weibo/databases/sina_weibo`.
|
|
14
|
+
* account.uid REQUIRED in this mode.
|
|
15
|
+
*
|
|
16
|
+
* Table/column names are DEVICE-VERIFIED against a real install
|
|
17
|
+
* (Redmi M2104K10AC, 微博 16.5.3, 2026-06-16):
|
|
18
|
+
* - posts → `home_table` (timeline cache; own posts = uid==selfUid)
|
|
19
|
+
* cols: mblogid / uid / content / time / rtnum /
|
|
20
|
+
* commentnum / attitudenum / src / longitude / latitude
|
|
21
|
+
* - favourites → `like_table` cols: mblogid / content / time / nick
|
|
22
|
+
* - follows → `follower_table` (following=1 ⇒ accounts the user
|
|
23
|
+
* follows) cols: user_id / screen_name / remark / gender
|
|
24
|
+
* The legacy `post`/`status`/`search_history` queries are kept as
|
|
25
|
+
* FALLBACKS (older builds) — on a modern device those tables don't
|
|
26
|
+
* exist so the adapter previously collected ZERO. Row VALUES were not
|
|
27
|
+
* validated (verification account was empty); column semantics use the
|
|
28
|
+
* standard Weibo schema. See memory pdh_collector_completeness_audit.
|
|
16
29
|
*
|
|
17
30
|
* Snapshot schema (mirrors WeiboLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
18
31
|
*
|
|
@@ -44,7 +57,7 @@ const {
|
|
|
44
57
|
} = require("../../constants");
|
|
45
58
|
|
|
46
59
|
const NAME = "social-weibo";
|
|
47
|
-
const VERSION = "0.
|
|
60
|
+
const VERSION = "0.7.0";
|
|
48
61
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
49
62
|
|
|
50
63
|
const KIND_POST = "post";
|
|
@@ -246,21 +259,64 @@ class WeiboAdapter {
|
|
|
246
259
|
? this._deps.dbDriverFactory()
|
|
247
260
|
: require("better-sqlite3-multiple-ciphers");
|
|
248
261
|
const db = new Driver(dbPath, { readonly: true });
|
|
262
|
+
// selfUid sanitised to digits — interpolated into a WHERE clause and
|
|
263
|
+
// sourced from wiring config (numeric uin). Defensive against injection.
|
|
264
|
+
const selfUid = String(this.account.uid).replace(/[^0-9]/g, "");
|
|
249
265
|
|
|
250
266
|
try {
|
|
267
|
+
// POSTS — device-verified `home_table` (own posts = uid==selfUid);
|
|
268
|
+
// legacy `post`/`status` kept as fallback for older builds.
|
|
251
269
|
const posts =
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
270
|
+
(selfUid &&
|
|
271
|
+
trySelect(
|
|
272
|
+
db,
|
|
273
|
+
`SELECT * FROM home_table WHERE uid='${selfUid}' ORDER BY time DESC LIMIT 5000`,
|
|
274
|
+
)) ||
|
|
275
|
+
trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000") ||
|
|
276
|
+
trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000") ||
|
|
277
|
+
[];
|
|
255
278
|
for (const row of posts) {
|
|
256
279
|
yield {
|
|
257
280
|
adapter: NAME,
|
|
258
|
-
originalId: `post-${row.id || row.mid || row.idstr}`,
|
|
259
|
-
capturedAt: parseTime(row.
|
|
281
|
+
originalId: `post-${row.mblogid || row.id || row.mid || row.idstr}`,
|
|
282
|
+
capturedAt: parseTime(row.time || row.created_at),
|
|
260
283
|
payload: { row, kind: KIND_POST },
|
|
261
284
|
};
|
|
262
285
|
}
|
|
263
286
|
|
|
287
|
+
// FAVOURITES — device-verified `like_table` (the account's likes).
|
|
288
|
+
// Legacy sqlite had no favourite path (folded into posts pre-A8).
|
|
289
|
+
const favourites =
|
|
290
|
+
trySelect(db, "SELECT * FROM like_table ORDER BY time DESC LIMIT 5000") || [];
|
|
291
|
+
for (const row of favourites) {
|
|
292
|
+
yield {
|
|
293
|
+
adapter: NAME,
|
|
294
|
+
originalId: `fav-${row.mblogid || row.id}`,
|
|
295
|
+
capturedAt: parseTime(row.time),
|
|
296
|
+
payload: { row, kind: KIND_FAVOURITE },
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// FOLLOWS — device-verified `follower_table`; following=1 ⇒ accounts
|
|
301
|
+
// the user follows (vs followers). Fallback to the whole table.
|
|
302
|
+
const follows =
|
|
303
|
+
trySelect(
|
|
304
|
+
db,
|
|
305
|
+
"SELECT * FROM follower_table WHERE following=1 ORDER BY user_id LIMIT 5000",
|
|
306
|
+
) ||
|
|
307
|
+
trySelect(db, "SELECT * FROM follower_table LIMIT 5000") ||
|
|
308
|
+
[];
|
|
309
|
+
for (const row of follows) {
|
|
310
|
+
yield {
|
|
311
|
+
adapter: NAME,
|
|
312
|
+
originalId: `follow-${row.user_id || row.id}`,
|
|
313
|
+
capturedAt: parseTime(row.time) || Date.now(),
|
|
314
|
+
payload: { row, kind: KIND_FOLLOW },
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// SEARCH — legacy only (`search_history` doesn't exist on modern
|
|
319
|
+
// weibo; trySelect returns null gracefully, loop is skipped).
|
|
264
320
|
const searches =
|
|
265
321
|
trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
|
|
266
322
|
|| [];
|
|
@@ -342,8 +398,9 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
342
398
|
// Sqlite mode: { kind:"post", row: { text, mid, ... } }
|
|
343
399
|
const row = p.row || p;
|
|
344
400
|
const isSnapshot = !p.row;
|
|
345
|
-
|
|
346
|
-
const
|
|
401
|
+
// home_table (device-verified) stores body in `content`, id in `mblogid`.
|
|
402
|
+
const text = row.text || row.content || "";
|
|
403
|
+
const mid = row.mid || row.mblogid || row.id || row.idstr || null;
|
|
347
404
|
const occurredAt =
|
|
348
405
|
parseTime(row.created_at || row.createdAt || row.time || raw.capturedAt) ||
|
|
349
406
|
ingestedAt;
|
|
@@ -369,13 +426,13 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
369
426
|
weiboMid: mid,
|
|
370
427
|
repostsCount:
|
|
371
428
|
row.repostsCount != null ? row.repostsCount
|
|
372
|
-
: row.reposts_count || row.repost || 0,
|
|
429
|
+
: row.reposts_count || row.repost || row.rtnum || 0,
|
|
373
430
|
commentsCount:
|
|
374
431
|
row.commentsCount != null ? row.commentsCount
|
|
375
|
-
: row.comments_count || row.comments || 0,
|
|
432
|
+
: row.comments_count || row.comments || row.commentnum || 0,
|
|
376
433
|
likesCount:
|
|
377
434
|
row.likesCount != null ? row.likesCount
|
|
378
|
-
: row.attitudes_count || row.likes || 0,
|
|
435
|
+
: row.attitudes_count || row.likes || row.attitudenum || 0,
|
|
379
436
|
picCount: row.picCount || row.pic_num || 0,
|
|
380
437
|
source: row.source || null,
|
|
381
438
|
location: row.location || row.geo || null,
|
|
@@ -387,13 +444,21 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
387
444
|
}
|
|
388
445
|
|
|
389
446
|
function normalizeFavourite(p, raw, ingestedAt) {
|
|
390
|
-
// Snapshot
|
|
391
|
-
//
|
|
392
|
-
//
|
|
393
|
-
const
|
|
394
|
-
const
|
|
395
|
-
const
|
|
396
|
-
const
|
|
447
|
+
// Snapshot: { kind:"favourite", mid, text, capturedAt, authorScreenName }
|
|
448
|
+
// Sqlite (device-verified `like_table`): { row: { mblogid, content, time,
|
|
449
|
+
// nick } }. Both shapes handled below.
|
|
450
|
+
const row = p.row || null;
|
|
451
|
+
const isSqlite = !!row;
|
|
452
|
+
const text = isSqlite ? (row.content || "") : (p.text || "");
|
|
453
|
+
const mid = isSqlite ? (row.mblogid || row.id || null) : (p.mid || null);
|
|
454
|
+
const occurredAt = isSqlite
|
|
455
|
+
? (parseTime(row.time) || raw.capturedAt || ingestedAt)
|
|
456
|
+
: (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
|
|
457
|
+
const source = buildSource(
|
|
458
|
+
raw,
|
|
459
|
+
occurredAt,
|
|
460
|
+
isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
461
|
+
);
|
|
397
462
|
return {
|
|
398
463
|
events: [{
|
|
399
464
|
id: newId(),
|
|
@@ -410,7 +475,9 @@ function normalizeFavourite(p, raw, ingestedAt) {
|
|
|
410
475
|
extra: {
|
|
411
476
|
platform: "weibo",
|
|
412
477
|
weiboMid: mid,
|
|
413
|
-
authorScreenName:
|
|
478
|
+
authorScreenName: isSqlite
|
|
479
|
+
? (row.nick || null)
|
|
480
|
+
: (p.authorScreenName || null),
|
|
414
481
|
},
|
|
415
482
|
}],
|
|
416
483
|
persons: [], places: [], items: [], topics: [],
|
|
@@ -418,15 +485,28 @@ function normalizeFavourite(p, raw, ingestedAt) {
|
|
|
418
485
|
}
|
|
419
486
|
|
|
420
487
|
function normalizeFollow(p, raw, ingestedAt) {
|
|
421
|
-
// Snapshot
|
|
422
|
-
//
|
|
488
|
+
// Snapshot: { kind:"follow", uid, screenName, description, avatarUrl,
|
|
489
|
+
// capturedAt }
|
|
490
|
+
// Sqlite (device-verified `follower_table`): { row: { user_id|id,
|
|
491
|
+
// screen_name, remark, gender } }. Both shapes handled below.
|
|
492
|
+
const row = p.row || null;
|
|
493
|
+
const isSqlite = !!row;
|
|
494
|
+
const rawUid = isSqlite ? (row.user_id || row.id) : p.uid;
|
|
423
495
|
const followUid =
|
|
424
|
-
(typeof
|
|
425
|
-
(typeof
|
|
496
|
+
(typeof rawUid === "number" && rawUid) ||
|
|
497
|
+
(typeof rawUid === "string" && rawUid.length > 0 && rawUid) ||
|
|
426
498
|
`unknown-${newId()}`;
|
|
427
|
-
const screenName =
|
|
428
|
-
|
|
429
|
-
|
|
499
|
+
const screenName = isSqlite
|
|
500
|
+
? (row.screen_name || row.remark || "(unnamed)")
|
|
501
|
+
: (p.screenName || "(unnamed)");
|
|
502
|
+
const occurredAt = isSqlite
|
|
503
|
+
? (parseTime(row.time) || raw.capturedAt || ingestedAt)
|
|
504
|
+
: (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
|
|
505
|
+
const source = buildSource(
|
|
506
|
+
raw,
|
|
507
|
+
occurredAt,
|
|
508
|
+
isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
509
|
+
);
|
|
430
510
|
const person = {
|
|
431
511
|
id: `person-weibo-${followUid}`,
|
|
432
512
|
type: ENTITY_TYPES.PERSON,
|
|
@@ -14,6 +14,7 @@ const { RelationsSkill } = require("./relations");
|
|
|
14
14
|
const { FootprintSkill } = require("./footprint");
|
|
15
15
|
const { InterestsSkill } = require("./interests");
|
|
16
16
|
const { TimelineSkill } = require("./timeline");
|
|
17
|
+
const { OverviewSkill } = require("./overview");
|
|
17
18
|
|
|
18
19
|
const SKILL_REGISTRY = Object.freeze({
|
|
19
20
|
"analysis.spending": SpendingSkill,
|
|
@@ -21,6 +22,7 @@ const SKILL_REGISTRY = Object.freeze({
|
|
|
21
22
|
"analysis.footprint": FootprintSkill,
|
|
22
23
|
"analysis.interests": InterestsSkill,
|
|
23
24
|
"analysis.timeline": TimelineSkill,
|
|
25
|
+
"analysis.overview": OverviewSkill,
|
|
24
26
|
});
|
|
25
27
|
|
|
26
28
|
const SKILL_NAMES = Object.freeze(Object.keys(SKILL_REGISTRY));
|
|
@@ -50,6 +52,7 @@ module.exports = {
|
|
|
50
52
|
FootprintSkill,
|
|
51
53
|
InterestsSkill,
|
|
52
54
|
TimelineSkill,
|
|
55
|
+
OverviewSkill,
|
|
53
56
|
SKILL_REGISTRY,
|
|
54
57
|
SKILL_NAMES,
|
|
55
58
|
ANALYSIS_SKILL_NAMES: SKILL_NAMES,
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* analysis.overview — cross-app unified snapshot for decision support.
|
|
3
|
+
*
|
|
4
|
+
* The de-silo capstone: every adapter normalizes into one vault, so this single
|
|
5
|
+
* skill aggregates ALL apps' Events into one picture — activity by app + by type,
|
|
6
|
+
* top relationships (merge-group aware, so the same person across WeChat/Douyin/
|
|
7
|
+
* Weibo counts once), spending across shopping/finance apps, and time trend.
|
|
8
|
+
* Gives the personal AI a unified "基于跨 app 数据" basis for decisions.
|
|
9
|
+
*
|
|
10
|
+
* Output:
|
|
11
|
+
* {
|
|
12
|
+
* skill, summary: { totalEvents, appsActive, period, topAppName },
|
|
13
|
+
* byApp: [{ app, count }], byType: [{ type, count }],
|
|
14
|
+
* monthlyActivity: [{ monthKey, count }],
|
|
15
|
+
* topContacts: [{ personId, name, interactions, byApp }],
|
|
16
|
+
* spending: { total, byDirection, currency },
|
|
17
|
+
* citations, llm_commentary,
|
|
18
|
+
* }
|
|
19
|
+
*/
|
|
20
|
+
"use strict";
|
|
21
|
+
|
|
22
|
+
const { AnalysisSkill } = require("./base");
|
|
23
|
+
|
|
24
|
+
const SPEND_SUBTYPES = new Set([
|
|
25
|
+
"payment", "transfer", "refund", "utility", "redenvelope", "investment", "income", "order",
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
class OverviewSkill extends AnalysisSkill {
|
|
29
|
+
constructor(opts) {
|
|
30
|
+
super({ ...opts, name: "analysis.overview" });
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async run(options = {}) {
|
|
34
|
+
const { since, until } = this.resolveTimeWindow(options);
|
|
35
|
+
const topN = Number.isFinite(options.topN) && options.topN > 0 ? options.topN : 10;
|
|
36
|
+
|
|
37
|
+
const q = { limit: Number.isFinite(options.limit) ? options.limit : 50_000 };
|
|
38
|
+
if (since != null) q.since = since;
|
|
39
|
+
if (until != null) q.until = until;
|
|
40
|
+
const events = this.vault.queryEvents(q) || [];
|
|
41
|
+
|
|
42
|
+
const byApp = new Map();
|
|
43
|
+
const byType = new Map();
|
|
44
|
+
const byMonth = new Map();
|
|
45
|
+
const contacts = new Map(); // canonicalPersonId → { interactions, byApp:Map }
|
|
46
|
+
let spendTotal = 0;
|
|
47
|
+
const spendByDir = new Map();
|
|
48
|
+
let currency = null;
|
|
49
|
+
const citations = [];
|
|
50
|
+
|
|
51
|
+
for (const e of events) {
|
|
52
|
+
const app = (e.source && e.source.adapter) || "unknown";
|
|
53
|
+
byApp.set(app, (byApp.get(app) || 0) + 1);
|
|
54
|
+
const type = e.subtype || "other";
|
|
55
|
+
byType.set(type, (byType.get(type) || 0) + 1);
|
|
56
|
+
if (Number.isFinite(e.occurredAt)) {
|
|
57
|
+
const d = new Date(e.occurredAt);
|
|
58
|
+
if (Number.isFinite(d.getTime())) {
|
|
59
|
+
const m = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
|
|
60
|
+
byMonth.set(m, (byMonth.get(m) || 0) + 1);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// relationships (actor + participants), merge-group canonicalized
|
|
64
|
+
const ids = (Array.isArray(e.participants) ? e.participants : []).concat(e.actor ? [e.actor] : []);
|
|
65
|
+
for (const pid of ids) {
|
|
66
|
+
if (!pid || pid === "person-self") continue;
|
|
67
|
+
const canon = this._canon(pid);
|
|
68
|
+
const cur = contacts.get(canon) || { interactions: 0, byApp: new Map() };
|
|
69
|
+
cur.interactions += 1;
|
|
70
|
+
cur.byApp.set(app, (cur.byApp.get(app) || 0) + 1);
|
|
71
|
+
contacts.set(canon, cur);
|
|
72
|
+
}
|
|
73
|
+
// spending
|
|
74
|
+
if (SPEND_SUBTYPES.has(type) && e.content && e.content.amount && Number.isFinite(e.content.amount.value)) {
|
|
75
|
+
const v = e.content.amount.value;
|
|
76
|
+
spendTotal += v;
|
|
77
|
+
const dir = e.content.amount.direction || "unknown";
|
|
78
|
+
spendByDir.set(dir, (spendByDir.get(dir) || 0) + v);
|
|
79
|
+
if (!currency && e.content.amount.currency) currency = e.content.amount.currency;
|
|
80
|
+
}
|
|
81
|
+
if (citations.length < 50) citations.push(e.id);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const byAppArr = [...byApp.entries()].map(([app, count]) => ({ app, count })).sort((a, b) => b.count - a.count);
|
|
85
|
+
const topContacts = [...contacts.entries()]
|
|
86
|
+
.map(([personId, v]) => ({
|
|
87
|
+
personId,
|
|
88
|
+
name: this._lookupName(personId),
|
|
89
|
+
interactions: v.interactions,
|
|
90
|
+
byApp: Object.fromEntries(v.byApp),
|
|
91
|
+
}))
|
|
92
|
+
.sort((a, b) => b.interactions - a.interactions)
|
|
93
|
+
.slice(0, topN);
|
|
94
|
+
|
|
95
|
+
const summary = {
|
|
96
|
+
totalEvents: events.length,
|
|
97
|
+
appsActive: byApp.size,
|
|
98
|
+
period: { since: since || null, until: until || null },
|
|
99
|
+
topAppName: byAppArr.length ? byAppArr[0].app : null,
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const result = {
|
|
103
|
+
skill: "analysis.overview",
|
|
104
|
+
summary,
|
|
105
|
+
byApp: byAppArr,
|
|
106
|
+
byType: [...byType.entries()].map(([type, count]) => ({ type, count })).sort((a, b) => b.count - a.count),
|
|
107
|
+
monthlyActivity: [...byMonth.entries()].map(([monthKey, count]) => ({ monthKey, count })).sort((a, b) => a.monthKey.localeCompare(b.monthKey)),
|
|
108
|
+
topContacts,
|
|
109
|
+
spending: {
|
|
110
|
+
total: Math.round(spendTotal * 100) / 100,
|
|
111
|
+
byDirection: Object.fromEntries(spendByDir),
|
|
112
|
+
currency: currency || null,
|
|
113
|
+
},
|
|
114
|
+
citations,
|
|
115
|
+
llm_commentary: null,
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
if (options.commentary !== false && this.llm && events.length > 0) {
|
|
119
|
+
result.llm_commentary = await this._commentary(result, options);
|
|
120
|
+
}
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
_canon(personId) {
|
|
125
|
+
const members = this.expandToMergeGroup(personId);
|
|
126
|
+
if (!members || members.length === 0) return personId;
|
|
127
|
+
// canonical = smallest id (stable across the group)
|
|
128
|
+
return [...members].sort()[0];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
_lookupName(personId) {
|
|
132
|
+
try {
|
|
133
|
+
if (typeof this.vault.getPerson === "function") {
|
|
134
|
+
const p = this.vault.getPerson(personId);
|
|
135
|
+
if (p && Array.isArray(p.names) && p.names.length) return p.names[0];
|
|
136
|
+
}
|
|
137
|
+
} catch (_e) { /* optional */ }
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async _commentary(result, options) {
|
|
142
|
+
const apps = result.byApp.slice(0, 5).map((a) => `${a.app}(${a.count})`).join(", ");
|
|
143
|
+
const types = result.byType.slice(0, 5).map((t) => `${t.type}(${t.count})`).join(", ");
|
|
144
|
+
const msg = `用户跨 ${result.summary.appsActive} 个 app 的数据汇总:
|
|
145
|
+
- 共 ${result.summary.totalEvents} 条事件;活跃 app(Top5): ${apps}
|
|
146
|
+
- 事件类型(Top5): ${types}
|
|
147
|
+
- 跨 app 消费合计: ${result.spending.total} ${result.spending.currency || ""}
|
|
148
|
+
- 高频联系人数: ${result.topContacts.length}
|
|
149
|
+
请用 3-4 句话,从「为个人决策提供依据」的角度,概括其数字生活重心与可关注点。中文。`;
|
|
150
|
+
return await this.callLlmCommentary([
|
|
151
|
+
{ role: "system", content: "你是个人数据中台的跨 app 洞察助手,基于事实给决策参考,克制不臆测。" },
|
|
152
|
+
{ role: "user", content: msg },
|
|
153
|
+
], { acceptNonLocal: options.acceptNonLocal });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = { OverviewSkill };
|
package/lib/analysis.js
CHANGED
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
"use strict";
|
|
23
23
|
|
|
24
24
|
const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
|
|
25
|
+
const { OverviewSkill } = require("./analysis-skills/overview");
|
|
25
26
|
const {
|
|
26
27
|
buildPrompt,
|
|
27
28
|
parseCitations,
|
|
@@ -212,6 +213,27 @@ class AnalysisEngine {
|
|
|
212
213
|
}
|
|
213
214
|
}
|
|
214
215
|
|
|
216
|
+
// Optional cross-app overview context (opt-in via options.crossApp) — runs
|
|
217
|
+
// the OverviewSkill aggregation and injects a compact summary so the LLM
|
|
218
|
+
// can answer cross-app / decision questions grounded in ALL apps' data.
|
|
219
|
+
let crossAppOverview;
|
|
220
|
+
if (options.crossApp) {
|
|
221
|
+
try {
|
|
222
|
+
const ov = await new OverviewSkill({ vault: this.vault }).run({
|
|
223
|
+
commentary: false,
|
|
224
|
+
topN: 5,
|
|
225
|
+
...(parsed.timeWindow &&
|
|
226
|
+
Number.isFinite(parsed.timeWindow.since) &&
|
|
227
|
+
Number.isFinite(parsed.timeWindow.until)
|
|
228
|
+
? { since: parsed.timeWindow.since, until: parsed.timeWindow.until }
|
|
229
|
+
: {}),
|
|
230
|
+
});
|
|
231
|
+
crossAppOverview = formatCrossAppOverview(ov);
|
|
232
|
+
} catch (_e) {
|
|
233
|
+
/* overview is best-effort context; never abort the ask */
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
215
237
|
// Build prompt.
|
|
216
238
|
const { messages, factIds, factCount, truncated } = buildPrompt({
|
|
217
239
|
question,
|
|
@@ -223,6 +245,7 @@ class AnalysisEngine {
|
|
|
223
245
|
vaultTotals: this._gatherVaultTotals(),
|
|
224
246
|
amountSummary:
|
|
225
247
|
parsed.intent === "sum-amount" ? this._gatherAmountSummary(parsed) : undefined,
|
|
248
|
+
crossAppOverview,
|
|
226
249
|
});
|
|
227
250
|
|
|
228
251
|
// Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
|
|
@@ -820,8 +843,35 @@ class AnalysisEngine {
|
|
|
820
843
|
}
|
|
821
844
|
}
|
|
822
845
|
|
|
846
|
+
/**
|
|
847
|
+
* Compact, prompt-friendly rendering of an OverviewSkill result for the
|
|
848
|
+
* CROSS_APP_OVERVIEW context block. Aggregate signals only (no raw content).
|
|
849
|
+
*/
|
|
850
|
+
function formatCrossAppOverview(ov) {
|
|
851
|
+
if (!ov || !ov.summary) return null;
|
|
852
|
+
const top = (arr, k, v) =>
|
|
853
|
+
(arr || []).slice(0, 5).map((x) => `${x[k]}(${x[v]})`).join(", ") || "无";
|
|
854
|
+
const lines = [
|
|
855
|
+
`共 ${ov.summary.totalEvents} 事件,跨 ${ov.summary.appsActive} 个 app`,
|
|
856
|
+
`活跃 app(Top): ${top(ov.byApp, "app", "count")}`,
|
|
857
|
+
`事件类型(Top): ${top(ov.byType, "type", "count")}`,
|
|
858
|
+
];
|
|
859
|
+
if (ov.spending && Number.isFinite(ov.spending.total) && ov.spending.total !== 0) {
|
|
860
|
+
lines.push(`跨 app 消费合计: ${ov.spending.total} ${ov.spending.currency || ""}`.trim());
|
|
861
|
+
}
|
|
862
|
+
if (Array.isArray(ov.topContacts) && ov.topContacts.length > 0) {
|
|
863
|
+
const c = ov.topContacts
|
|
864
|
+
.slice(0, 5)
|
|
865
|
+
.map((x) => `${x.name || x.personId}(${x.interactions})`)
|
|
866
|
+
.join(", ");
|
|
867
|
+
lines.push(`高频联系人(Top): ${c}`);
|
|
868
|
+
}
|
|
869
|
+
return lines.join("\n");
|
|
870
|
+
}
|
|
871
|
+
|
|
823
872
|
module.exports = {
|
|
824
873
|
AnalysisEngine,
|
|
874
|
+
formatCrossAppOverview,
|
|
825
875
|
DEFAULT_MAX_FACTS,
|
|
826
876
|
DEFAULT_MAX_QUERY_LIMIT,
|
|
827
877
|
LATEST_INTENT_FACT_LIMIT,
|